### 🖋 **Notebook Contents**

0. Initial Setup
1. Modelling
2. Conclusion
3. Recommendation

****

## `Initial Setup`

In [236]:
# Data Manipulation
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Model Algorithm (modeling)
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.pipeline import Pipeline
import statsmodels.api as sm

# Data Preparation
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_validate, RandomizedSearchCV, GridSearchCV, StratifiedKFold, KFold
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
import category_encoders as ce
from sklearn.compose import TransformedTargetRegressor

# Evaluation metrics
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
# function to calculate adjusted R2
def adj_r2(val, rowCount, featureCount):
    return 1 - (1-val)*(rowCount-1)/(rowCount-featureCount-1)
import statistics as stats

# ignore warning
import warnings
warnings.filterwarnings("ignore")

In [237]:
# load the data
data = pd.read_csv('..\data\processed\salaries_clean.csv')

# convert data type into category
# objectint_columns = data.select_dtypes(include=['object', 'int64']).columns
# data[objectint_columns] = data[objectint_columns].astype('category')

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4250 entries, 0 to 4249
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   work_year           4250 non-null   int64 
 1   experience_level    4250 non-null   object
 2   employment_type     4250 non-null   object
 3   salary_in_usd       4250 non-null   int64 
 4   employee_residence  4250 non-null   object
 5   remote_ratio        4250 non-null   int64 
 6   company_location    4250 non-null   object
 7   company_size        4250 non-null   object
 8   job_position        4250 non-null   object
 9   job_scope           4250 non-null   object
dtypes: int64(3), object(7)
memory usage: 332.2+ KB


## `Modelling`

### Feature Engineering: 

1. is_similar_loc

In [238]:
# def issimilarloc(data: pd.DataFrame):
#     if data['employee_residence'] == data['company_location']:
#         return 1
#     else:
#         return 0
    
# data['is_similar_loc'] = data.apply(issimilarloc, axis=1)

# drop employee_residence and company_location columns
# data = data.drop(columns=['employee_residence','company_location'])

2. continent

In [239]:
continent_code =pd.read_csv("..\data\raw\ISO-3166-Countries-with-Regional-Codes.csv")
continent_code = continent_code.drop_duplicates(subset='alpha-2')

data = pd.merge(data, continent_code, left_on='employee_residence', right_on='alpha-2', how='left')
data = data[['work_year', 'experience_level', 'employment_type', 'salary_in_usd',
        'remote_ratio', 'employee_residence', 'company_location',
       'company_size', 'job_position', 'job_scope', 'region']]
data

Unnamed: 0,work_year,experience_level,employment_type,salary_in_usd,remote_ratio,employee_residence,company_location,company_size,job_position,job_scope,region
0,2023,SE,FT,132000,100,US,US,M,STAFF,DATA ENGINEER,Americas
1,2023,MI,FT,81206,0,GB,GB,M,STAFF,ML/AI ENGINEER,Europe
2,2023,EX,FT,330000,0,US,US,M,HEAD,ML/AI ENGINEER,Americas
3,2023,EX,FT,188000,0,US,US,M,HEAD,ML/AI ENGINEER,Americas
4,2023,MI,FT,140000,0,US,US,M,STAFF,BUSINESS INTELLIGENCE,Americas
...,...,...,...,...,...,...,...,...,...,...,...
4245,2020,SE,FT,412000,100,US,US,L,STAFF,DATA SCIENTIST,Americas
4246,2021,MI,FT,151000,100,US,US,L,STAFF,DATA SCIENTIST,Americas
4247,2020,EN,FT,105000,100,US,US,S,STAFF,DATA SCIENTIST,Americas
4248,2020,EN,CT,100000,100,US,US,L,STAFF,DATA ANALYST,Americas


3. Encoding the columns

In [240]:
# check initial info of data
pd.DataFrame({
    'column': data.columns,
    'type': data.dtypes,
    'n_unique': data.nunique(),
    'sample_unique': [data[col].sort_values().unique() for col in data.columns]
}).reset_index(drop=True)

Unnamed: 0,column,type,n_unique,sample_unique
0,work_year,int64,4,"[2020, 2021, 2022, 2023]"
1,experience_level,object,4,"[EN, EX, MI, SE]"
2,employment_type,object,4,"[CT, FL, FT, PT]"
3,salary_in_usd,int64,1500,"[15000, 15680, 15809, 15897, 15966, 16000, 162..."
4,remote_ratio,int64,3,"[0, 50, 100]"
5,employee_residence,object,84,"[AD, AE, AM, AR, AS, AT, AU, BA, BE, BG, BO, B..."
6,company_location,object,72,"[AD, AE, AM, AR, AS, AT, AU, BA, BE, BR, BS, C..."
7,company_size,object,3,"[L, M, S]"
8,job_position,object,5,"[DIRECTOR, HEAD, LEAD, MANAGER, STAFF]"
9,job_scope,object,9,"[ANALYTICS ENGINEER, BUSINESS INTELLIGENCE, DA..."


**_Insight_**:
- OneHot --> employement_type, remote_ratio
- LeaveOneOut --> job_scope
- OrdinalEncoder --> experience_level, company_size, job_position
- TargetEncoder --> employee_residence, company_location

References: [Encoding Cheat Sheet](https://raw.githubusercontent.com/alteryx/categorical_encoding/090e8d207aa14dd278e03209b4663cf9af0cad45/guides/flowchart/Categorical%20Encoding%20Flowchart.png)

In [241]:
# define columns that we want to encode and how the way we encode them
onehot = ['employment_type', 'region']
ordinal = ['experience_level', 'company_size', 'job_position']
leaveoneout = 'job_scope'
targetencod = ['employee_residence', 'company_location']

# ordinal mapping
orenMap = [{'col': 'experience_level', 'mapping': {'EN': 0, 'MI': 1, 'SE': 2, 'EX': 3}}, 
     {'col': 'company_size', 'mapping': {'S': 0, 'M': 1, 'L': 2}},
     {'col': 'job_position', 'mapping': {'STAFF': 0, 'LEAD': 1, 'MANAGER': 2, 'HEAD': 3, 'DIRECTOR': 4}}]

In [242]:
# create encoder using Column Transformer from sklearn
ct = ColumnTransformer([
    ('One Hot Encoder', ce.OneHotEncoder(), onehot),
    ('Ordinal Encoder', ce.OrdinalEncoder(cols=ordinal, mapping=orenMap), ordinal),
    ('LeaveOneOut encoder', ce.LeaveOneOutEncoder(), leaveoneout),
    ('Target Encoder', ce.TargetEncoder(), targetencod),
    # ('remote_ratio', Preprocces_remote_ratio(), 'remote_ratio')
], remainder='passthrough')

### Split the data into data train and data test

In [243]:
# define features and target
target = 'salary_in_usd'
feature = data.drop(columns=target).columns

# random_state
RANDOM_STATE = 7

# split the data into train and test
dfTrain, dfTest = train_test_split(data, test_size=0.2, random_state=RANDOM_STATE)

display(dfTrain.shape, dfTest.shape)

(3400, 11)

(850, 11)

### Modelling Benchmark

In [244]:
# Model without transformed target
from sklearn.neural_network import MLPRegressor

# mlp_regressor = MLPRegressor(hidden_layer_sizes=(1000, 1000, 1000, 1000, 1000), random_state=RANDOM_STATE, max_iter=10000, warm_start=True, activation='relu', verbose=True)

# Stand Alone Model
lr = LinearRegression()
lasso = Lasso(random_state=RANDOM_STATE)
ridge = Ridge(random_state=RANDOM_STATE)
elastic = ElasticNet(random_state=RANDOM_STATE)
knn = KNeighborsRegressor()
dt = DecisionTreeRegressor(random_state=RANDOM_STATE)

# Ensemble Model
rf = RandomForestRegressor(random_state=RANDOM_STATE)
xgb = XGBRegressor(random_state=RANDOM_STATE)
ada = AdaBoostRegressor(random_state=RANDOM_STATE)
catboost = CatBoostRegressor(random_state=RANDOM_STATE, verbose=False)
lightgbm = LGBMRegressor(random_state=RANDOM_STATE, n_jobs=-1, verbosity=0)

# Model with transformed target
# Stand Alone Model
log_lr = TransformedTargetRegressor(lr, func=np.log, inverse_func=np.exp)
log_lasso = TransformedTargetRegressor(lasso, func=np.log, inverse_func=np.exp)
log_ridge = TransformedTargetRegressor(ridge, func=np.log, inverse_func=np.exp)
log_elastic = TransformedTargetRegressor(elastic, func=np.log, inverse_func=np.exp)

In [245]:
# collect model
models = {
    'LinearRegression': lr,
    'Lasso' : lasso,
    'Ridge' : ridge,
    'Elastic' : elastic,
    'KNeighborsRegressor': knn,
    'DecisionTreeRegressor': dt,
    'RandomForestRegressor': rf,
    'XGBRegressor': xgb,
    'AdaBoostRegressor': ada,
    'CatBoostRegressor' : catboost,
    'LGBMRegressor' : lightgbm,
    'LinearRegression-logTarget': log_lr,
    'Lasso-logTarget' : log_lasso,
    'Ridge-logTarget' : log_ridge,
    'Elastic-logTarget' : log_elastic,
    # 'MLPRegressor' : mlp_regressor
    }

result1 = []

for name, est in models.items():
    
    crossval = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

    estimator = Pipeline([
        ('preprocess_encod', ct),
        ('model', est)
    ])

    # MAE, MAPE, and R2 cross-validation & fit time
    cv_result = cross_validate(
        estimator, 
        dfTrain[feature], 
        dfTrain[target], 
        cv=crossval, 
        scoring=['neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'r2'],
        return_train_score=True,
        n_jobs=-1
        )
    
    # Fit time
    fit_time = cv_result['fit_time']
    
    # MAE
    train_mae = -cv_result['train_neg_mean_absolute_error']
    val_mae = -cv_result['test_neg_mean_absolute_error']

    # MAPE
    train_mape = -cv_result['train_neg_mean_absolute_percentage_error']
    val_mape = -cv_result['test_neg_mean_absolute_percentage_error']

    # R2
    train_r2 = cv_result['train_r2']
    val_r2 = cv_result['test_r2']

    # Adjusted R2
    res_train = []
    res_val = []
    for train in train_r2:
        res_train.append(adj_r2(train, len(dfTrain), len(feature)))
    for val in val_r2:
        res_val.append(adj_r2(val, len(dfTrain), len(feature)))

    # Collect Result
    result1.append(
        {
            'Model': name,
            # score
            'MAE_train': abs(train_mae.mean()),
            'MAE_val' : abs(val_mae.mean()),
            'MAPE_train': abs(train_mape.mean()),
            'MAPE_val' : abs(val_mape.mean()),
            'r2_train': train_r2.mean(),
            'r2_val' : val_r2.mean(),
            'Adjusted-r2_train':np.array(res_train).mean(),
            'Adjusted-r2_val':np.array(res_val).mean(),
            # standard deviation
            'std-MAE_train': stats.stdev(train_mae),
            'std-MAE_val': stats.stdev(val_mae),
            'std-MAPE_train': stats.stdev(train_mape),
            'std-MAPE_val' : stats.stdev(val_mape),
            'std-r2_train' : stats.stdev(train_r2),
            'std-r2_val' : stats.stdev(val_r2),
            'std-adjusted-r2_train' : stats.stdev(res_train),
            'std-adjusted-r2_val' : stats.stdev(res_val),
            # fit time
            'mean_fit_time' : fit_time.mean(),
            'std_fit_time' : stats.stdev(fit_time)

        }
    )

resultDf1 = pd.DataFrame(result1).set_index('Model')

printedTab1 = resultDf1.sort_values(by='MAPE_val')\
    .style\
    .highlight_max(subset=['r2_val','Adjusted-r2_val'], color = 'yellow', axis = 0)\
    .highlight_min(subset=['MAE_val','MAPE_val'], color = 'yellow', axis = 0)

# display table
printedTab1

Unnamed: 0_level_0,MAE_train,MAE_val,MAPE_train,MAPE_val,r2_train,r2_val,Adjusted-r2_train,Adjusted-r2_val,std-MAE_train,std-MAE_val,std-MAPE_train,std-MAPE_val,std-r2_train,std-r2_val,std-adjusted-r2_train,std-adjusted-r2_val,mean_fit_time,std_fit_time
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Ridge-logTarget,39673.666304,40104.44924,0.324751,0.331695,0.364747,0.351358,0.362872,0.349444,302.668281,1578.400326,0.002119,0.009498,0.006806,0.046084,0.006826,0.04622,0.046878,2e-06
LinearRegression-logTarget,39673.95062,40108.901562,0.324745,0.331788,0.364932,0.351473,0.363058,0.349559,302.552315,1576.790189,0.002119,0.009384,0.006844,0.046111,0.006864,0.046247,0.07356,0.01003
Ridge,40409.904164,40841.116856,0.36791,0.374052,0.385162,0.37103,0.383348,0.369174,300.215707,1114.032669,0.002695,0.00424,0.006566,0.034876,0.006586,0.034979,0.078413,0.006697
Lasso,40416.943088,40853.668268,0.367996,0.374227,0.385185,0.370894,0.383371,0.369038,299.665789,1115.50539,0.0027,0.004143,0.006568,0.03497,0.006588,0.035073,0.072673,0.005729
LinearRegression,40417.845595,40855.149787,0.368016,0.374257,0.385185,0.370869,0.383371,0.369013,299.482294,1115.550871,0.002699,0.004124,0.006568,0.03498,0.006588,0.035083,0.068058,0.004389
Elastic-logTarget,43478.565023,44108.108132,0.375813,0.386768,0.24366,0.223299,0.241429,0.221007,316.095947,1541.948295,0.002871,0.010759,0.003249,0.026828,0.003259,0.026907,0.05947,0.020531
Lasso-logTarget,43455.612666,44091.156022,0.375942,0.386978,0.243271,0.222724,0.241038,0.220431,307.650523,1488.610082,0.002883,0.010929,0.003328,0.025565,0.003338,0.02564,0.050009,0.006994
Elastic,41421.143196,41924.114392,0.390018,0.397785,0.349801,0.333254,0.347882,0.331286,326.51416,1109.106258,0.003539,0.005714,0.004488,0.023982,0.004502,0.024052,0.072,0.009216
RandomForestRegressor,41698.025786,43877.153876,0.358628,0.397835,0.315138,0.256614,0.313117,0.25442,576.125173,1104.336254,0.013051,0.010235,0.007387,0.025263,0.007409,0.025338,0.539367,0.009695
KNeighborsRegressor,42809.217397,44575.604588,0.363207,0.398018,0.268794,0.22105,0.266637,0.218752,488.617278,1115.570159,0.009208,0.01183,0.013738,0.025169,0.013779,0.025243,0.068266,0.008499


### Permutation Importance

In [246]:
from sklearn.inspection import permutation_importance

# INITIALIZE UNTRAINED MODEL
estimator = Pipeline([
        ('preprocess_encod', ct),
        ('model', log_ridge)
    ])
estimator.fit(dfTrain[feature], dfTrain[target])

r = permutation_importance(estimator, dfTrain[feature], dfTrain[target], n_repeats=30, random_state=0, scoring='neg_root_mean_squared_error')
feature_rank = []
for i in r.importances_mean.argsort()[::-1]:
    if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
        print(f"{dfTrain[feature].columns[i]:<20}"
        f"{r.importances_mean[i]:.3f}"
        f" +/- {r.importances_std[i]:.3f}")
        feature_rank.append(dfTrain[feature].columns[i])

experience_level    7377.756 +/- 440.255
job_scope           5802.093 +/- 277.419
employee_residence  2638.924 +/- 235.131
company_location    2020.511 +/- 230.512
region              1008.759 +/- 158.520
work_year           866.844 +/- 110.635
job_position        567.906 +/- 107.560
remote_ratio        160.747 +/- 50.718


In [247]:
mean_mae, std_mae = [], []
mean_mape, std_mape = [], []
feature_names = []

feature_rank_1 = ['experience_level',
 'job_scope',
 'employee_residence',
 'company_location',
 'region_1', 'region_2', 'region_3', 'region_4', 'region_5',
 'work_year',
 'job_position',
 'remote_ratio',
#  'employment_type_1','employment_type_2','employment_type_3','employment_type_4',
]

ct1 = ColumnTransformer([
    ('One Hot Encoder', ce.OneHotEncoder(), onehot),
    ('Ordinal Encoder', ce.OrdinalEncoder(cols=ordinal, mapping=orenMap), ordinal),
    ('LeaveOneOut encoder', ce.LeaveOneOutEncoder(), leaveoneout),
    ('Target Encoder', ce.TargetEncoder(), targetencod),
    # ('remote_ratio', Preprocces_remote_ratio(), 'remote_ratio')
], remainder='passthrough', verbose_feature_names_out=False)

preproccess = Pipeline([
        ('preprocess_encod', ct1)
    ]).set_output(transform='pandas')
X_train_encoded = preproccess.fit_transform(dfTrain[feature], dfTrain[target])
# X_train_encoded

for i in range(len(feature_rank_1)):
    feature_names.append(feature_rank_1[:i+1])

    # estimator
    estimator = Pipeline([
        ('model', log_ridge)
    ])

    # cross validaation technique
    crossval = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

    cv_result1 = cross_validate(
        estimator=estimator,
        X=X_train_encoded[feature_rank_1[:i+1]],
        y=dfTrain[target],
        cv=crossval,
        scoring=['neg_mean_absolute_error', 'neg_mean_absolute_percentage_error'],
    return_train_score=True
    )

    val_mae = -cv_result1['test_neg_mean_absolute_error']
    mean_mae.append(val_mae.mean())
    std_mae.append(val_mae.std())

    val_mape = -cv_result1['test_neg_mean_absolute_percentage_error']
    mean_mape.append(val_mape.mean())
    std_mape.append(val_mape.std())

pd.DataFrame({
    'features': feature_names,
    'mean_MAE': mean_mae,
    'std_MAE': std_mae,
    'mean_MAPE': mean_mape,
    'std_MAPE': std_mape
})

Unnamed: 0,features,mean_MAE,std_MAE,mean_MAPE,std_MAPE
0,[experience_level],47566.659154,1626.484718,0.434594,0.013702
1,"[experience_level, job_scope]",45449.088588,1378.471424,0.418699,0.011412
2,"[experience_level, job_scope, employee_residence]",40385.069829,1347.76914,0.336281,0.007809
3,"[experience_level, job_scope, employee_residen...",40410.11794,1340.912161,0.336587,0.0075
4,"[experience_level, job_scope, employee_residen...",40336.244676,1351.575492,0.33512,0.007258
5,"[experience_level, job_scope, employee_residen...",40212.587514,1266.023796,0.331594,0.005291
6,"[experience_level, job_scope, employee_residen...",40222.51042,1263.518833,0.33153,0.005241
7,"[experience_level, job_scope, employee_residen...",40195.52178,1262.606249,0.331113,0.005155
8,"[experience_level, job_scope, employee_residen...",40195.145799,1261.763575,0.33099,0.005071
9,"[experience_level, job_scope, employee_residen...",40041.409674,1335.028536,0.328714,0.005644


### Hyperparameter Tuning : Ridge

In [248]:
# Define a range of hyperparameters to search
param_grid_ridge = {
    'model__alpha': [0.001, 0.01, 0.1, 1, 10, 100],
    'model__fit_intercept': [True, False],
    'model__solver': ['auto', 'svd', 'cholesky', 'lsqr', 'lbfgs'],
    'model__positive' : [True, False],
    'model__max_iter': [100, 500, 1000],
    'model__tol': [1e-4, 1e-3, 1e-2]
}

estimator_ridge = Pipeline([
        ('preprocess_encod', ct),
        ('model', ridge)
        ])

crossval = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

# Hyperparameter tuning Ridge using grid search technique
gridsearch_ridge = GridSearchCV(
    estimator_ridge, 
    param_grid = param_grid_ridge,
    cv = crossval, 
    scoring = ['neg_mean_absolute_error', 'neg_mean_absolute_percentage_error'], 
    n_jobs = -1,
    refit = 'neg_mean_absolute_error'
)

gridsearch_ridge.fit(dfTrain[feature], dfTrain[target])

In [249]:
# best parameters on September 13, 2023
print('Ridge')
print('Best_score:', -gridsearch_ridge.best_score_)
print('Best_params:', gridsearch_ridge.best_params_)

Ridge
Best_score: 40823.592631369866
Best_params: {'model__alpha': 10, 'model__fit_intercept': True, 'model__max_iter': 100, 'model__positive': False, 'model__solver': 'svd', 'model__tol': 0.0001}


In [262]:
# compare before and after tuning

# model after tuning
ridge_tuning = gridsearch_ridge.best_estimator_[-1]

# collect ridge model before and after tuning
models2 = {
    'Ridge-before': ridge,
    'Ridge-after': ridge_tuning
    }

result2 = []

for name, est in models2.items():
    
    crossval1 = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

    estimator_ridge = Pipeline([
        ('preprocess_encod', ct),
        ('model', TransformedTargetRegressor(est, func=np.log, inverse_func=np.exp))
    ])#.set_output(transform='pandas')

    # MAE, MAPE, and R2 cross-validation & fit time
    cv_result = cross_validate(
        estimator_ridge, 
        dfTrain[feature], 
        dfTrain[target], 
        cv=crossval1, 
        scoring=['neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'r2'],
        return_train_score=True,
        n_jobs=-1
        )
    
    # Fit time
    fit_time = cv_result['fit_time']
    
    # MAE
    train_mae = -cv_result['train_neg_mean_absolute_error']
    val_mae = -cv_result['test_neg_mean_absolute_error']

    # MAPE
    train_mape = -cv_result['train_neg_mean_absolute_percentage_error']
    val_mape = -cv_result['test_neg_mean_absolute_percentage_error']

    # R2
    train_r2 = cv_result['train_r2']
    val_r2 = cv_result['test_r2']

    # Adjusted R2
    res_train = []
    res_val = []
    for train in train_r2:
        res_train.append(adj_r2(train, len(dfTrain), len(feature)))
    for val in val_r2:
        res_val.append(adj_r2(val, len(dfTrain), len(feature)))

    # Collect Result
    result2.append(
        {
            'Model': name,
            # score
            'MAE_train': abs(train_mae.mean()),
            'MAE_val' : abs(val_mae.mean()),
            'MAPE_train': abs(train_mape.mean()),
            'MAPE_val' : abs(val_mape.mean()),
            'r2_train': train_r2.mean(),
            'r2_val' : val_r2.mean(),
            'Adjusted-r2_train':np.array(res_train).mean(),
            'Adjusted-r2_val':np.array(res_val).mean(),
            # standard deviation
            'std-MAE_train': stats.stdev(train_mae),
            'std-MAE_val': stats.stdev(val_mae),
            'std-MAPE_train': stats.stdev(train_mape),
            'std-MAPE_val' : stats.stdev(val_mape),
            'std-r2_train' : stats.stdev(train_r2),
            'std-r2_val' : stats.stdev(val_r2),
            'std-adjusted-r2_train' : stats.stdev(res_train),
            'std-adjusted-r2_val' : stats.stdev(res_val),
            # fit time
            'mean_fit_time' : fit_time.mean(),
            'std_fit_time' : stats.stdev(fit_time)

        }
    )

resultDf2 = pd.DataFrame(result2).set_index('Model')

printedTab2 = resultDf2.sort_values(by='MAPE_val')\
    .style\
    .highlight_max(subset=['r2_val','Adjusted-r2_val'], color = 'yellow', axis = 0)\
    .highlight_min(subset=['MAE_val','MAPE_val'], color = 'yellow', axis = 0)

# display table
printedTab2

Unnamed: 0_level_0,MAE_train,MAE_val,MAPE_train,MAPE_val,r2_train,r2_val,Adjusted-r2_train,Adjusted-r2_val,std-MAE_train,std-MAE_val,std-MAPE_train,std-MAPE_val,std-r2_train,std-r2_val,std-adjusted-r2_train,std-adjusted-r2_val,mean_fit_time,std_fit_time
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Ridge-before,39673.666304,40104.44924,0.324751,0.331695,0.364747,0.351358,0.362872,0.349444,302.668281,1578.400326,0.002119,0.009498,0.006806,0.046084,0.006826,0.04622,0.077116,0.005838
Ridge-after,39678.725485,40103.317865,0.325053,0.331791,0.363639,0.350338,0.361761,0.348421,303.209318,1593.142822,0.002083,0.009917,0.006609,0.045761,0.006629,0.045896,0.086288,0.007983


### Conclusion:
- After compare with XGBoost model in 2.0.1 and/or 2.0.1.1 notebooks, XGboost model has better value of the metric evaluation. So we don't use this model anymore.