In [1]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline

import transformers as trans

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

pd.set_option('display.max_columns', None)

In [2]:
#%load_ext pycodestyle_magic

In [3]:
#%pycodestyle_on

In [4]:
#from yapf.yapflib.yapf_api import FormatCode
#import json

In [5]:
df_train = pd.read_csv('data/train-student-mat.csv')

X_train = df_train.drop(['Unnamed: 0', 'id', 'Final_Score', 'G1', 'G2', 'G3'],
                        axis=1)
y_train = df_train['Final_Score']

df_test = pd.read_csv('data/test-student-mat.csv')
X_test = df_test.drop(['Unnamed: 0', 'id', 'Final_Score', 'G1', 'G2', 'G3'],
                      axis=1)
y_test = df_test['Final_Score']

In [6]:
cat_features = ['school', 'sex', 'address', 'famsize', 'Pstatus', 'Mjob',
                'Fjob', 'reason', 'guardian', 'schoolsup', 'famsup', 'paid',
                'activities', 'nursery', 'higher', 'internet', 'romantic']

cat_ordinal_features = ['school', 'sex', 'address', 'famsize',
                        'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob',
                        'reason', 'guardian', 'traveltime', 'studytime',
                        'failures', 'schoolsup', 'famsup', 'paid',
                        'activities', 'nursery', 'higher', 'internet',
                        'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
                        'Walc', 'health']
#at_binary_fetures = []

In [7]:
trans_dict = {
    'ohe_cat': trans.OneHotEncoderDF(cat_features),
    'ohe_cat_ord': trans.OneHotEncoderDF(cat_ordinal_features),
    'label_cat': trans.OrdinalEncoderDF(cat_features),
    'cof_cat': trans.CountOfFreqEncoder(cat_features),
    'cof_cat_ord': trans.CountOfFreqEncoder(cat_ordinal_features),
    'target_ord': trans.OrderedIntTargetEncoder(cat_features),
    'target_mean': trans.AggTargetEncoder(cat_features, 'mean'),
    'target_mean': trans.AggTargetEncoder(cat_ordinal_features, 'mean')
    }

In [8]:
def adj_r2_fun(r2, n, p):
    adj_rsquared = 1 - (1 - r2) * ((n - 1)/(n-p-1))
    return adj_rsquared

In [9]:
def metrics_calculation(y_true, y_pred, number_of_predictors):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    adj_r2 = adj_r2_fun(r2, len(y_pred), number_of_predictors)
    return mae, mse, r2, adj_r2

In [10]:
def add_results(results, model_name, trans, mae_train, mse_train, r2_train,
                adj_r2_train, mae_test, mse_test, r2_test, adj_r2_test):
    temp = pd.DataFrame({'Method': [model_name],
                         'Transformer': [trans],
                         'MAE train': [mae_train],
                         'RMSE train': [mse_train],
                         'R2 train': [r2_train],
                         'Adj R2 train': [adj_r2_train],
                         'MAE test': [mae_test],
                         'RMSE test': [mse_test],
                         'R2 test': [r2_test],
                         'Adj R2 test': [adj_r2_test]})

    results = pd.concat([results, temp])

    results = results[['Method', 'Transformer', 'MAE train', 'RMSE train',
                       'R2 train', 'Adj R2 train', 'MAE test', 'RMSE test',
                       'R2 test', 'Adj R2 test']]
    return results

## Linear Regression

In [11]:
#del results
results=pd.DataFrame()

In [12]:
for key, val in trans_dict.items():
    model = LinearRegression()
    X_train_trans = val.fit_transform(X_train, y_train)
    X_test_trans = val.transform(X_test)
    model.fit(X_train_trans, y_train)
    y_pred_train = model.predict(X_train_trans)
    y_pred_test = model.predict(X_test_trans)
    mae_train, mse_train, r2_train, adj_r2_train = metrics_calculation(
        y_train, y_pred_train, X_train_trans.shape[1])
    mae_test, mse_test, r2_test, adj_r2_test = metrics_calculation(
        y_test, y_pred_test, X_test_trans.shape[1])
    results = add_results(results, 'LinReg', key, mae_train, mse_train, r2_train, adj_r2_train,
                      mae_test, mse_test, r2_test, adj_r2_test)

In [13]:
results

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,LinReg,ohe_cat,2.355628,9.252799,0.319504,0.146287,2.855511,12.064859,0.220054,-0.495961
0,LinReg,ohe_cat_ord,2.167674,7.783917,0.427533,0.117313,3.015075,14.61818,0.054992,-4.528296
0,LinReg,label_cat,2.458912,9.798957,0.279337,0.191451,2.912924,12.379502,0.199714,-0.076247
0,LinReg,cof_cat,2.474169,9.773287,0.281225,0.19357,2.891552,12.22716,0.209562,-0.063003
0,LinReg,cof_cat_ord,2.420525,9.608855,0.293318,0.207137,2.914275,12.578625,0.186841,-0.093558
0,LinReg,target_ord,2.445794,9.634068,0.291464,0.205057,2.901424,12.241308,0.208648,-0.064233
0,LinReg,target_mean,2.317015,8.74752,0.356665,0.278209,2.97966,13.350164,0.136964,-0.160634


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,LinReg,ohe_cat,2.355628,9.252799,0.319504,0.146287,2.855511,12.064859,0.220054,-0.495961
0,LinReg,ohe_cat_ord,2.167674,7.783917,0.427533,0.117313,3.015075,14.61818,0.054992,-4.528296
0,LinReg,label_cat,2.458912,9.798957,0.279337,0.191451,2.912924,12.379502,0.199714,-0.076247
0,LinReg,cof_cat,2.474169,9.773287,0.281225,0.19357,2.891552,12.22716,0.209562,-0.063003
0,LinReg,cof_cat_ord,2.420525,9.608855,0.293318,0.207137,2.914275,12.578625,0.186841,-0.093558
0,LinReg,target_ord,2.445794,9.634068,0.291464,0.205057,2.901424,12.241308,0.208648,-0.064233
0,LinReg,target_mean,2.317015,8.74752,0.356665,0.278209,2.97966,13.350164,0.136964,-0.160634


### Support Vector Regression

In [14]:
results_svm=pd.DataFrame()

In [15]:
for key, val in trans_dict.items():
    pipe = Pipeline(
        steps=[
            ("step1", val),
            ('step2', MinMaxScaler()),
        ])
    X_train_trans = pipe.fit_transform(X_train, y_train)
    X_test_trans = pipe.transform(X_test)
    
    param_grid = {'C': [0.1, 1, 10, 100, 1000], 
                  'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                  'kernel': ['rbf']} 
    
    model = SVR()
    grid = GridSearchCV(model, param_grid, refit = True, verbose = 3)
    grid.fit(X_train_trans, y_train)
    model_best = grid.best_estimator_
    
    y_pred_train = model_best.predict(X_train_trans)
    y_pred_test = model_best.predict(X_test_trans)
    mae_train, mse_train, r2_train, adj_r2_train = metrics_calculation(
        y_train, y_pred_train, X_train_trans.shape[1])
    mae_test, mse_test, r2_test, adj_r2_test = metrics_calculation(
        y_test, y_pred_test, X_test_trans.shape[1])
    results_svm = add_results(results_svm, 'SVM', key, mae_train, mse_train, r2_train, adj_r2_train,
                      mae_test, mse_test, r2_test, adj_r2_test)
    

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.071 total time=   0.0s
[CV 2/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.090 total time=   0.0s
[CV 3/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.039 total time=   0.0s
[CV 4/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.010 total time=   0.0s
[CV 5/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.002 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.059 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.072 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.021 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.019 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.002 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.01, kernel=rbf;, score=-0.069 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=0.01, kernel=rbf;

[CV 1/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.073 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.095 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.036 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.003 total time=   0.0s
[CV 1/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.129 total time=   0.0s
[CV 2/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.086 total time=   0.0s
[CV 3/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.005 total time=   0.0s
[CV 4/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.020 total time=   0.0s
[CV 5/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.001 total time=   0.0s
[CV 1/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.224 total time=   0.0s
[CV 2/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.067 total time=   0.0s
[CV 3/5] END ......C=10, gam

[CV 2/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.178 total time=   0.0s
[CV 3/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.498 total time=   0.0s
[CV 4/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.401 total time=   0.0s
[CV 5/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.343 total time=   0.0s
[CV 1/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.645 total time=   0.0s
[CV 2/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.391 total time=   0.0s
[CV 3/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.590 total time=   0.0s
[CV 4/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.664 total time=   0.0s
[CV 5/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.519 total time=   0.0s
[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.049 total time=   0.0s
[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.164 total time=   0.0s
[CV 3/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.105 total time=   0.0s
[CV 4/5] END ..C=1000, gamma

[CV 1/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.071 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.094 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.036 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.002 total time=   0.0s
[CV 1/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.137 total time=   0.0s
[CV 2/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.090 total time=   0.0s
[CV 3/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.009 total time=   0.0s
[CV 4/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.023 total time=   0.0s
[CV 5/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.000 total time=   0.0s
[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.043 total time=   0.0s
[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.026 total time=   0.0s
[CV 3/5] END ......C=10, gam

[CV 3/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.009 total time=   0.0s
[CV 4/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.023 total time=   0.0s
[CV 5/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.000 total time=   0.0s
[CV 1/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.001 total time=   0.0s
[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.019 total time=   0.0s
[CV 3/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.075 total time=   0.0s
[CV 4/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.070 total time=   0.0s
[CV 5/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.106 total time=   0.0s
[CV 1/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.303 total time=   0.0s
[CV 2/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.146 total time=   0.0s
[CV 3/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.507 total time=   0.0s
[CV 4/5] END ...C=1000, gamma=0.01, kernel=rbf;, score=-0.544 total time=   0.0s
[CV 3/5] END ......C=1000, g

[CV 1/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.036 total time=   0.0s
[CV 2/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.062 total time=   0.0s
[CV 3/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.018 total time=   0.0s
[CV 4/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.028 total time=   0.0s
[CV 5/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.037 total time=   0.0s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.076 total time=   0.0s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.068 total time=   0.0s
[CV 3/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.051 total time=   0.0s
[CV 5/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.056 total time=   0.0s
[CV 1/5] END ......C=1, gamma=0.01, kernel=rbf;, score=-0.023 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.01, kernel=rbf;, score=-0.037 total time=   0.0s
[CV 3/5] END ......C=1, gamm

[CV 4/5] END ..C=100, gamma=0.0001, kernel=rbf;, score=-0.017 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.001 total time=   0.0s
[CV 1/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.052 total time=   0.0s
[CV 2/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.077 total time=   0.0s
[CV 3/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.017 total time=   0.0s
[CV 4/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.078 total time=   0.0s
[CV 5/5] END ......C=1000, gamma=1, kernel=rbf;, score=-0.172 total time=   0.0s
[CV 1/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.454 total time=   0.0s
[CV 2/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.288 total time=   0.0s
[CV 3/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.988 total time=   0.0s
[CV 4/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-1.067 total time=   0.0s
[CV 5/5] END ....C=1000, gamma=0.1, kernel=rbf;, score=-0.729 total time=   0.0s
[CV 1/5] END ....C=1000, gam

[CV 1/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.058 total time=   0.0s
[CV 2/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.068 total time=   0.0s
[CV 3/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.024 total time=   0.0s
[CV 4/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.034 total time=   0.0s
[CV 5/5] END .........C=1, gamma=1, kernel=rbf;, score=-0.033 total time=   0.0s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.056 total time=   0.0s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.042 total time=   0.0s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.021 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.058 total time=   0.0s
[CV 5/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.063 total time=   0.0s
[CV 1/5] END ......C=1, gamma=0.01, kernel=rbf;, score=-0.027 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.01, kernel=rbf;, score=-0.045 total time=   0.0s
[CV 3/5] END .......C=1, gam

[CV 4/5] END .......C=100, gamma=1, kernel=rbf;, score=-0.071 total time=   0.0s
[CV 5/5] END .......C=100, gamma=1, kernel=rbf;, score=-0.135 total time=   0.0s
[CV 1/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.203 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.241 total time=   0.0s
[CV 3/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.524 total time=   0.0s
[CV 4/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.598 total time=   0.0s
[CV 5/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.618 total time=   0.0s
[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.107 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.195 total time=   0.0s
[CV 3/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.053 total time=   0.0s
[CV 4/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.240 total time=   0.0s
[CV 5/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.176 total time=   0.0s
[CV 1/5] END ....C=100, gamm

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.068 total time=   0.0s
[CV 2/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.086 total time=   0.0s
[CV 3/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.039 total time=   0.0s
[CV 4/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.012 total time=   0.0s
[CV 5/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.001 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.033 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.059 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.016 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.010 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.016 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.01, kernel=rbf;, score=-0.066 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=0.01, kernel=rbf;

[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.001 total time=   0.0s
[CV 4/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.028 total time=   0.0s
[CV 5/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.006 total time=   0.0s
[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.037 total time=   0.0s
[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.141 total time=   0.0s
[CV 3/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.093 total time=   0.0s
[CV 4/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.287 total time=   0.0s
[CV 5/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.197 total time=   0.0s
[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.138 total time=   0.0s
[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.167 total time=   0.0s
[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.051 total time=   0.0s
[CV 4/5] END .....C=10, gamma=0.01, kernel=rbf;, score=-0.025 total time=   0.0s
[CV 5/5] END ......C=10, gam

[CV 3/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.080 total time=   0.0s
[CV 4/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.147 total time=   0.0s
[CV 5/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.127 total time=   0.0s
[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.144 total time=   0.0s
[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.179 total time=   0.0s
[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.043 total time=   0.0s
[CV 4/5] END .C=1000, gamma=0.0001, kernel=rbf;, score=-0.021 total time=   0.0s
[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.025 total time=   0.0s
Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.065 total time=   0.0s
[CV 3/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.080 total time=   0.0s
[CV 4/5] END ..C=1000, gamma=0.001, kernel=rbf;, score=-0.147 total time=   0.0s
[CV 5/5] END ..C=1000, gamma=0.001, kernel=rbf;

[CV 3/5] END .....C=1, gamma=0.001, kernel=rbf;, score=-0.029 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.001, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.000 total time=   0.0s
[CV 1/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.072 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.095 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.037 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.0001, kernel=rbf;, score=-0.003 total time=   0.0s
[CV 1/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.081 total time=   0.0s
[CV 2/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.055 total time=   0.0s
[CV 3/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.006 total time=   0.0s
[CV 4/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.059 total time=   0.0s
[CV 5/5] END ........C=10, g

[CV 4/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.421 total time=   0.0s
[CV 5/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.424 total time=   0.0s
[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.212 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.247 total time=   0.0s
[CV 3/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.089 total time=   0.0s
[CV 4/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.262 total time=   0.0s
[CV 5/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.153 total time=   0.0s
[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.163 total time=   0.0s
[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.158 total time=   0.0s
[CV 3/5] END ...C=100, gamma=0.001, kernel=rbf;, score=-0.002 total time=   0.0s
[CV 4/5] END ...C=100, gamma=0.001, kernel=rbf;, score=-0.073 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.001, kernel=rbf;, score=-0.032 total time=   0.0s
[CV 1/5] END ..C=100, gamma=

[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.206 total time=   0.0s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.106 total time=   0.0s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.112 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.1, kernel=rbf;, score=-0.012 total time=   0.0s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.041 total time=   0.0s
[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.037 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.01, kernel=rbf;, score=-0.013 total time=   0.0s
[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.024 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.011 total time=   0.0s
[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.039 total time=   0.0s
[CV 1/5] END .....C=1, gamma=0.001, kernel=rbf;, score=-0.058 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.001, kernel=rbf;, score=-0.082 total time=   0.0s
[CV 3/5] END .....C=1, gamma

[CV 3/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.231 total time=   0.0s
[CV 4/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.487 total time=   0.0s
[CV 5/5] END .....C=100, gamma=0.1, kernel=rbf;, score=-0.454 total time=   0.0s
[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.325 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.251 total time=   0.0s
[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.162 total time=   0.0s
[CV 4/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.166 total time=   0.0s
[CV 5/5] END ....C=100, gamma=0.01, kernel=rbf;, score=-0.020 total time=   0.0s
[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.238 total time=   0.0s
[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.205 total time=   0.0s
[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.134 total time=   0.0s
[CV 4/5] END ...C=100, gamma=0.001, kernel=rbf;, score=-0.014 total time=   0.0s
[CV 5/5] END ....C=100, gamm

In [16]:
results_svm

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,SVM,ohe_cat,2.38102,10.343419,0.239295,0.045661,2.981667,12.620309,0.184147,-0.564833
0,SVM,ohe_cat_ord,2.153526,9.002381,0.337921,-0.020859,2.955927,12.841107,0.169873,-3.856243
0,SVM,label_cat,2.465462,10.746869,0.209623,0.113236,2.981308,12.556208,0.188291,-0.091609
0,SVM,cof_cat,2.48841,10.93661,0.195669,0.097579,2.969997,12.594314,0.185827,-0.094922
0,SVM,cof_cat_ord,2.409663,10.243264,0.246661,0.15479,2.962912,12.713384,0.17813,-0.105274
0,SVM,target_ord,2.454114,10.624693,0.218609,0.123317,2.958013,12.268921,0.206863,-0.066633
0,SVM,target_mean,2.204522,9.299195,0.316092,0.232689,3.120678,13.978032,0.096375,-0.21522


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,SVM,ohe_cat,2.38102,10.343419,0.239295,0.045661,2.981667,12.620309,0.184147,-0.564833
0,SVM,ohe_cat_ord,2.153526,9.002381,0.337921,-0.020859,2.955927,12.841107,0.169873,-3.856243
0,SVM,label_cat,2.465462,10.746869,0.209623,0.113236,2.981308,12.556208,0.188291,-0.091609
0,SVM,cof_cat,2.48841,10.93661,0.195669,0.097579,2.969997,12.594314,0.185827,-0.094922
0,SVM,cof_cat_ord,2.409663,10.243264,0.246661,0.15479,2.962912,12.713384,0.17813,-0.105274
0,SVM,target_ord,2.454114,10.624693,0.218609,0.123317,2.958013,12.268921,0.206863,-0.066633
0,SVM,target_mean,2.204522,9.299195,0.316092,0.232689,3.120678,13.978032,0.096375,-0.21522


## Decision Tree Regression

In [17]:
results_dt=pd.DataFrame()

In [18]:
for key, val in trans_dict.items():
    X_train_trans = val.fit_transform(X_train, y_train)
    X_test_trans = val.transform(X_test)
    
    param_grid = {'criterion': ['squared_error', 'friedman_mse', 
                                'absolute_error', 'poisson'], 
                  'max_depth': [3, 5, 7, 9, 11, 13, 15],
                 } 
    
    model = DecisionTreeRegressor()
    grid = GridSearchCV(model, param_grid, refit = True, verbose = 3)
    grid.fit(X_train_trans, y_train)
    model_best = grid.best_estimator_
    
    y_pred_train = model_best.predict(X_train_trans)
    y_pred_test = model_best.predict(X_test_trans)
    mae_train, mse_train, r2_train, adj_r2_train = metrics_calculation(
        y_train, y_pred_train, X_train_trans.shape[1])
    mae_test, mse_test, r2_test, adj_r2_test = metrics_calculation(
        y_test, y_pred_test, X_test_trans.shape[1])
    results_dt = add_results(results_dt, 'Dec Tree', key, mae_train, mse_train,
                          r2_train, adj_r2_train, mae_test, mse_test,
                          r2_test, adj_r2_test)
    print(grid.best_params_)

Fitting 5 folds for each of 28 candidates, totalling 140 fits
[CV 1/5] END criterion=squared_error, max_depth=3;, score=-0.268 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=3;, score=0.185 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=3;, score=0.043 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=3;, score=-0.216 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=3;, score=-0.173 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=5;, score=-0.331 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=5;, score=0.102 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=5;, score=-0.136 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=5;, score=-0.114 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=5;, score=-0.640 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=7;, score=-0.935 total time=   0.0s
[CV 2/5] END crit

[CV 3/5] END criterion=friedman_mse, max_depth=11;, score=-0.813 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=11;, score=-0.547 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=11;, score=-0.782 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=13;, score=-1.243 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=13;, score=0.131 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=13;, score=-0.921 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=13;, score=-0.611 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=13;, score=-0.948 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=15;, score=-1.555 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=15;, score=0.122 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=15;, score=-0.790 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=15;, score=-0.468 total time=  

[CV 5/5] END criterion=absolute_error, max_depth=15;, score=-2.357 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=3;, score=-0.184 total time=   0.0s
[CV 2/5] END ....criterion=poisson, max_depth=3;, score=0.151 total time=   0.0s
[CV 3/5] END ....criterion=poisson, max_depth=3;, score=0.048 total time=   0.0s
[CV 4/5] END ...criterion=poisson, max_depth=3;, score=-0.216 total time=   0.0s
[CV 5/5] END ...criterion=poisson, max_depth=3;, score=-0.202 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=5;, score=-0.187 total time=   0.0s
[CV 2/5] END ....criterion=poisson, max_depth=5;, score=0.137 total time=   0.0s
[CV 3/5] END ....criterion=poisson, max_depth=5;, score=0.046 total time=   0.0s
[CV 4/5] END ...criterion=poisson, max_depth=5;, score=-0.114 total time=   0.0s
[CV 5/5] END ...criterion=poisson, max_depth=5;, score=-0.632 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=7;, score=-0.630 total time=   0.0s
[CV 2/5] END ....criter

[CV 3/5] END criterion=squared_error, max_depth=11;, score=-1.509 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=11;, score=-1.033 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=11;, score=-0.492 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=13;, score=-1.164 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=13;, score=0.113 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=13;, score=-1.366 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=13;, score=-0.991 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=13;, score=-0.260 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=15;, score=-1.187 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=15;, score=0.032 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=15;, score=-1.554 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=15;, score=-0.951 t

[CV 5/5] END criterion=absolute_error, max_depth=3;, score=-0.025 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=5;, score=-0.172 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=5;, score=0.025 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=5;, score=-0.522 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=5;, score=-0.363 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=5;, score=-0.526 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=7;, score=-0.317 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=7;, score=-0.084 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=7;, score=-0.835 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=7;, score=-0.534 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=7;, score=-0.742 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=9;, score=-0.795 

[CV 4/5] END ..criterion=poisson, max_depth=11;, score=-1.131 total time=   0.0s
[CV 5/5] END ..criterion=poisson, max_depth=11;, score=-0.706 total time=   0.0s
[CV 1/5] END ..criterion=poisson, max_depth=13;, score=-0.604 total time=   0.0s
[CV 2/5] END ..criterion=poisson, max_depth=13;, score=-0.099 total time=   0.0s
[CV 3/5] END ..criterion=poisson, max_depth=13;, score=-1.314 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=13;, score=-1.079 total time=   0.0s
[CV 5/5] END ..criterion=poisson, max_depth=13;, score=-0.927 total time=   0.0s
[CV 1/5] END ..criterion=poisson, max_depth=15;, score=-0.857 total time=   0.0s
[CV 2/5] END ..criterion=poisson, max_depth=15;, score=-0.112 total time=   0.0s
[CV 3/5] END ..criterion=poisson, max_depth=15;, score=-1.337 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=15;, score=-0.873 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=11;, score=-1.131 total time=   0.0s
[CV 5/5] END ..criterion=poi

[CV 1/5] END criterion=friedman_mse, max_depth=5;, score=-0.503 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=5;, score=0.125 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=5;, score=-0.198 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=5;, score=-0.154 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=5;, score=-0.763 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=7;, score=-0.658 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=7;, score=0.216 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=7;, score=-0.636 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=7;, score=-0.533 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=7;, score=-0.748 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=9;, score=-0.884 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=9;, score=0.068 total time=   0.0s
[CV 3/5

[CV 2/5] END criterion=absolute_error, max_depth=11;, score=-0.251 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=11;, score=-1.061 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=11;, score=-1.038 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=11;, score=-2.006 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=13;, score=-1.993 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=13;, score=-0.322 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=13;, score=-0.901 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=13;, score=-1.080 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=13;, score=-2.139 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=11;, score=-0.251 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=11;, score=-1.061 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=11;, 

[CV 2/5] END ..criterion=poisson, max_depth=15;, score=-0.010 total time=   0.0s
[CV 3/5] END ...criterion=poisson, max_depth=15;, score=0.006 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=15;, score=-0.548 total time=   0.0s
[CV 5/5] END ..criterion=poisson, max_depth=15;, score=-0.974 total time=   0.0s
{'criterion': 'absolute_error', 'max_depth': 3}
Fitting 5 folds for each of 28 candidates, totalling 140 fits
[CV 1/5] END criterion=squared_error, max_depth=3;, score=0.083 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=3;, score=0.185 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=3;, score=0.043 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=3;, score=-0.216 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=3;, score=-0.173 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=5;, score=-0.199 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=5;, score=0.130 t

[CV 4/5] END criterion=friedman_mse, max_depth=11;, score=-0.595 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=11;, score=-0.789 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=13;, score=-1.335 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=13;, score=0.333 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=13;, score=-0.922 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=13;, score=-0.503 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=13;, score=-1.071 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=15;, score=-1.063 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=15;, score=0.295 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=15;, score=-1.093 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=15;, score=-0.781 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=15;, score=-0.938 total time=  

[CV 2/5] END criterion=absolute_error, max_depth=15;, score=-0.475 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=15;, score=-0.411 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=15;, score=-1.097 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=15;, score=-0.943 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=3;, score=-0.043 total time=   0.0s
[CV 2/5] END ....criterion=poisson, max_depth=3;, score=0.151 total time=   0.0s
[CV 3/5] END ....criterion=poisson, max_depth=3;, score=0.048 total time=   0.0s
[CV 4/5] END ...criterion=poisson, max_depth=3;, score=-0.216 total time=   0.0s
[CV 5/5] END ...criterion=poisson, max_depth=3;, score=-0.173 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=5;, score=-0.032 total time=   0.0s
[CV 2/5] END ....criterion=poisson, max_depth=5;, score=0.129 total time=   0.0s
[CV 3/5] END ....criterion=poisson, max_depth=5;, score=0.008 total time=   0.0s
[CV 4/5]

[CV 1/5] END criterion=squared_error, max_depth=11;, score=-0.904 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=11;, score=-0.218 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=11;, score=-0.730 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=11;, score=-1.104 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=11;, score=-1.359 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=13;, score=-0.611 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=13;, score=-0.229 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=13;, score=-0.717 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=13;, score=-1.147 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=13;, score=-1.804 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=15;, score=-0.922 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=15;, score=-0.119

[CV 1/5] END criterion=absolute_error, max_depth=5;, score=-0.275 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=5;, score=-0.195 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=5;, score=-0.113 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=5;, score=-0.390 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=5;, score=-0.398 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=7;, score=-1.321 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=7;, score=-0.199 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=7;, score=-0.185 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=7;, score=-0.943 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=7;, score=-0.754 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=9;, score=-1.469 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=9;, score=-0.274

[CV 1/5] END ..criterion=poisson, max_depth=13;, score=-0.759 total time=   0.0s
[CV 2/5] END ..criterion=poisson, max_depth=13;, score=-0.024 total time=   0.0s
[CV 3/5] END ..criterion=poisson, max_depth=13;, score=-1.003 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=13;, score=-0.870 total time=   0.0s
[CV 5/5] END ..criterion=poisson, max_depth=13;, score=-1.375 total time=   0.0s
[CV 1/5] END ..criterion=poisson, max_depth=15;, score=-0.726 total time=   0.0s
[CV 2/5] END ..criterion=poisson, max_depth=15;, score=-0.120 total time=   0.0s
[CV 3/5] END ..criterion=poisson, max_depth=15;, score=-0.973 total time=   0.0s
[CV 4/5] END ..criterion=poisson, max_depth=15;, score=-0.854 total time=   0.0s
[CV 5/5] END ..criterion=poisson, max_depth=15;, score=-1.400 total time=   0.0s
{'criterion': 'squared_error', 'max_depth': 3}
Fitting 5 folds for each of 28 candidates, totalling 140 fits
[CV 1/5] END criterion=squared_error, max_depth=3;, score=-0.268 total time=   0.

[CV 3/5] END criterion=friedman_mse, max_depth=5;, score=-0.193 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=5;, score=-0.125 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=5;, score=-0.699 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=7;, score=-0.661 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=7;, score=0.213 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=7;, score=-0.877 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=7;, score=-0.470 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=7;, score=-0.533 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=9;, score=-0.735 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=9;, score=0.206 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=9;, score=-0.945 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=9;, score=-0.436 total time=   0.0s
[CV 5/

[CV 1/5] END criterion=absolute_error, max_depth=13;, score=-1.245 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=13;, score=-0.103 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=13;, score=-0.735 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=13;, score=-1.030 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=13;, score=-2.312 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=15;, score=-1.411 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=15;, score=-0.154 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=15;, score=-0.845 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=13;, score=-1.245 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=13;, score=-0.103 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=13;, score=-0.735 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=13;, 

Fitting 5 folds for each of 28 candidates, totalling 140 fits
[CV 1/5] END criterion=squared_error, max_depth=3;, score=-0.357 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=3;, score=0.198 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=3;, score=0.012 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=3;, score=-0.252 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=3;, score=-0.152 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=5;, score=-0.637 total time=   0.0s
[CV 2/5] END criterion=squared_error, max_depth=5;, score=0.099 total time=   0.0s
[CV 3/5] END criterion=squared_error, max_depth=5;, score=-0.066 total time=   0.0s
[CV 4/5] END criterion=squared_error, max_depth=5;, score=-0.220 total time=   0.0s
[CV 5/5] END criterion=squared_error, max_depth=5;, score=-0.716 total time=   0.0s
[CV 1/5] END criterion=squared_error, max_depth=7;, score=-1.291 total time=   0.0s
[CV 2/5] END crit

[CV 2/5] END criterion=friedman_mse, max_depth=11;, score=-0.059 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=11;, score=-1.067 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=11;, score=-0.848 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=11;, score=-1.501 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=13;, score=-0.869 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=13;, score=-0.162 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=13;, score=-1.081 total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=13;, score=-0.697 total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=13;, score=-2.357 total time=   0.0s
[CV 1/5] END criterion=friedman_mse, max_depth=15;, score=-1.148 total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=15;, score=-0.136 total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=15;, score=-0.906 total time=

[CV 5/5] END criterion=absolute_error, max_depth=13;, score=-1.751 total time=   0.0s
[CV 1/5] END criterion=absolute_error, max_depth=15;, score=-0.983 total time=   0.0s
[CV 2/5] END criterion=absolute_error, max_depth=15;, score=0.063 total time=   0.0s
[CV 3/5] END criterion=absolute_error, max_depth=15;, score=-1.419 total time=   0.0s
[CV 4/5] END criterion=absolute_error, max_depth=15;, score=-0.795 total time=   0.0s
[CV 5/5] END criterion=absolute_error, max_depth=15;, score=-1.770 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=3;, score=-0.302 total time=   0.0s
[CV 2/5] END ....criterion=poisson, max_depth=3;, score=0.201 total time=   0.0s
[CV 3/5] END ....criterion=poisson, max_depth=3;, score=0.068 total time=   0.0s
[CV 4/5] END ...criterion=poisson, max_depth=3;, score=-0.252 total time=   0.0s
[CV 5/5] END ...criterion=poisson, max_depth=3;, score=-0.182 total time=   0.0s
[CV 1/5] END ...criterion=poisson, max_depth=5;, score=-0.293 total time=   0.0s

In [19]:
results_dt	

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,Dec Tree,ohe_cat,2.406137,10.773574,0.207659,0.005972,2.949153,13.176398,0.148198,-0.633785
0,Dec Tree,ohe_cat_ord,2.391237,9.607252,0.293436,-0.089451,3.123268,14.853743,0.039764,-4.617381
0,Dec Tree,label_cat,2.406137,10.773574,0.207659,0.111032,2.949153,13.176398,0.148198,-0.145527
0,Dec Tree,cof_cat,2.384367,9.792614,0.279804,0.191975,2.972383,13.549218,0.124096,-0.177939
0,Dec Tree,cof_cat_ord,2.431866,9.681327,0.287988,0.201157,2.972556,13.682845,0.115458,-0.189557
0,Dec Tree,target_ord,2.406137,10.773574,0.207659,0.111032,2.949153,13.176398,0.148198,-0.145527
0,Dec Tree,target_mean,2.371331,9.675845,0.288391,0.20161,3.058747,14.277046,0.077045,-0.241215


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,Dec Tree,ohe_cat,2.406137,10.773574,0.207659,0.005972,2.949153,13.176398,0.148198,-0.633785
0,Dec Tree,ohe_cat_ord,2.391237,9.607252,0.293436,-0.089451,3.123268,14.853743,0.039764,-4.617381
0,Dec Tree,label_cat,2.406137,10.773574,0.207659,0.111032,2.949153,13.176398,0.148198,-0.145527
0,Dec Tree,cof_cat,2.384367,9.792614,0.279804,0.191975,2.972383,13.549218,0.124096,-0.177939
0,Dec Tree,cof_cat_ord,2.431866,9.681327,0.287988,0.201157,2.972556,13.682845,0.115458,-0.189557
0,Dec Tree,target_ord,2.406137,10.773574,0.207659,0.111032,2.949153,13.176398,0.148198,-0.145527
0,Dec Tree,target_mean,2.371331,9.675845,0.288391,0.20161,3.058747,14.277046,0.077045,-0.241215


## Random Forest Regressor

In [20]:
results_rfr=pd.DataFrame()

In [21]:
for key, val in trans_dict.items():
    X_train_trans = val.fit_transform(X_train, y_train)
    X_test_trans = val.transform(X_test)
    
    param_grid = {'n_estimators': [50, 150, 250],
                  'max_depth': [3, 5, 7],
                 } 
    
    model = RandomForestRegressor()
    grid = GridSearchCV(model, param_grid, refit = True, verbose = 3)
    grid.fit(X_train_trans, y_train)
    model_best = grid.best_estimator_
    
    y_pred_train = model_best.predict(X_train_trans)
    y_pred_test = model_best.predict(X_test_trans)
    mae_train, mse_train, r2_train, adj_r2_train = metrics_calculation(
        y_train, y_pred_train, X_train_trans.shape[1])
    mae_test, mse_test, r2_test, adj_r2_test = metrics_calculation(
        y_test, y_pred_test, X_test_trans.shape[1])
    results_rfr = add_results(results_rfr, 'Random Forest', key, mae_train, mse_train,
                          r2_train, adj_r2_train, mae_test, mse_test,
                          r2_test, adj_r2_test)
    print(grid.best_params_)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ......max_depth=3, n_estimators=50;, score=0.022 total time=   0.0s
Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ......max_depth=3, n_estimators=50;, score=0.022 total time=   0.0s
[CV 2/5] END ......max_depth=3, n_estimators=50;, score=0.237 total time=   0.0s
[CV 3/5] END ......max_depth=3, n_estimators=50;, score=0.145 total time=   0.0s
[CV 2/5] END ......max_depth=3, n_estimators=50;, score=0.237 total time=   0.0s
[CV 3/5] END ......max_depth=3, n_estimators=50;, score=0.145 total time=   0.0s
[CV 4/5] END .....max_depth=3, n_estimators=50;, score=-0.062 total time=   0.0s
[CV 5/5] END .....max_depth=3, n_estimators=50;, score=-0.037 total time=   0.0s
[CV 4/5] END .....max_depth=3, n_estimators=50;, score=-0.062 total time=   0.0s
[CV 5/5] END .....max_depth=3, n_estimators=50;, score=-0.037 total time=   0.0s
[CV 1/5] END ....max_depth=3, n_estimators=150;, score=-0.002 total ti

[CV 4/5] END .....max_depth=3, n_estimators=50;, score=-0.133 total time=   0.0s
[CV 5/5] END ......max_depth=3, n_estimators=50;, score=0.069 total time=   0.0s
[CV 1/5] END ....max_depth=3, n_estimators=150;, score=-0.011 total time=   0.2s
[CV 1/5] END ....max_depth=3, n_estimators=150;, score=-0.011 total time=   0.2s
[CV 2/5] END .....max_depth=3, n_estimators=150;, score=0.227 total time=   0.3s
[CV 2/5] END .....max_depth=3, n_estimators=150;, score=0.227 total time=   0.3s
[CV 3/5] END .....max_depth=3, n_estimators=150;, score=0.155 total time=   0.3s
[CV 3/5] END .....max_depth=3, n_estimators=150;, score=0.155 total time=   0.3s
[CV 4/5] END ....max_depth=3, n_estimators=150;, score=-0.104 total time=   0.3s
[CV 4/5] END ....max_depth=3, n_estimators=150;, score=-0.104 total time=   0.3s
[CV 5/5] END .....max_depth=3, n_estimators=150;, score=0.126 total time=   0.4s
[CV 5/5] END .....max_depth=3, n_estimators=150;, score=0.126 total time=   0.4s
[CV 1/5] END .....max_depth=

[CV 4/5] END ....max_depth=3, n_estimators=150;, score=-0.098 total time=   0.2s
[CV 5/5] END ....max_depth=3, n_estimators=150;, score=-0.002 total time=   0.2s
[CV 5/5] END ....max_depth=3, n_estimators=150;, score=-0.002 total time=   0.2s
[CV 1/5] END ....max_depth=3, n_estimators=250;, score=-0.031 total time=   0.7s
[CV 1/5] END ....max_depth=3, n_estimators=250;, score=-0.031 total time=   0.7s
[CV 2/5] END .....max_depth=3, n_estimators=250;, score=0.263 total time=   0.9s
[CV 2/5] END .....max_depth=3, n_estimators=250;, score=0.263 total time=   0.9s
[CV 3/5] END .....max_depth=3, n_estimators=250;, score=0.126 total time=   1.1s
[CV 3/5] END .....max_depth=3, n_estimators=250;, score=0.126 total time=   1.1s
[CV 4/5] END ....max_depth=3, n_estimators=250;, score=-0.076 total time=   0.9s
[CV 4/5] END ....max_depth=3, n_estimators=250;, score=-0.076 total time=   0.9s
[CV 5/5] END .....max_depth=3, n_estimators=250;, score=0.019 total time=   0.5s
[CV 1/5] END .....max_depth=

[CV 4/5] END ....max_depth=3, n_estimators=250;, score=-0.077 total time=   0.6s
[CV 4/5] END ....max_depth=3, n_estimators=250;, score=-0.077 total time=   0.6s
[CV 5/5] END .....max_depth=3, n_estimators=250;, score=0.017 total time=   0.6s
[CV 1/5] END ......max_depth=5, n_estimators=50;, score=0.041 total time=   0.1s
[CV 5/5] END .....max_depth=3, n_estimators=250;, score=0.017 total time=   0.6s
[CV 1/5] END ......max_depth=5, n_estimators=50;, score=0.041 total time=   0.1s
[CV 2/5] END ......max_depth=5, n_estimators=50;, score=0.233 total time=   0.1s
[CV 3/5] END ......max_depth=5, n_estimators=50;, score=0.156 total time=   0.1s
[CV 2/5] END ......max_depth=5, n_estimators=50;, score=0.233 total time=   0.1s
[CV 3/5] END ......max_depth=5, n_estimators=50;, score=0.156 total time=   0.1s
[CV 4/5] END .....max_depth=5, n_estimators=50;, score=-0.130 total time=   0.1s
[CV 5/5] END .....max_depth=5, n_estimators=50;, score=-0.009 total time=   0.1s
[CV 4/5] END .....max_depth=

[CV 4/5] END .....max_depth=5, n_estimators=50;, score=-0.084 total time=   0.1s
[CV 5/5] END ......max_depth=5, n_estimators=50;, score=0.097 total time=   0.1s
[CV 4/5] END .....max_depth=5, n_estimators=50;, score=-0.084 total time=   0.1s
[CV 5/5] END ......max_depth=5, n_estimators=50;, score=0.097 total time=   0.1s
[CV 1/5] END .....max_depth=5, n_estimators=150;, score=0.072 total time=   0.4s
[CV 1/5] END .....max_depth=5, n_estimators=150;, score=0.072 total time=   0.4s
[CV 2/5] END .....max_depth=5, n_estimators=150;, score=0.262 total time=   0.3s
[CV 2/5] END .....max_depth=5, n_estimators=150;, score=0.262 total time=   0.3s
[CV 3/5] END .....max_depth=5, n_estimators=150;, score=0.122 total time=   0.4s
[CV 3/5] END .....max_depth=5, n_estimators=150;, score=0.122 total time=   0.4s
[CV 4/5] END ....max_depth=5, n_estimators=150;, score=-0.118 total time=   0.4s
[CV 4/5] END ....max_depth=5, n_estimators=150;, score=-0.118 total time=   0.4s
[CV 5/5] END .....max_depth=

[CV 3/5] END .....max_depth=5, n_estimators=150;, score=0.143 total time=   0.4s
[CV 4/5] END ....max_depth=5, n_estimators=150;, score=-0.109 total time=   0.4s
[CV 4/5] END ....max_depth=5, n_estimators=150;, score=-0.109 total time=   0.4s
[CV 5/5] END ....max_depth=5, n_estimators=150;, score=-0.053 total time=   0.4s
[CV 5/5] END ....max_depth=5, n_estimators=150;, score=-0.053 total time=   0.4s
[CV 1/5] END .....max_depth=5, n_estimators=250;, score=0.002 total time=   0.7s
[CV 1/5] END .....max_depth=5, n_estimators=250;, score=0.002 total time=   0.7s
[CV 2/5] END .....max_depth=5, n_estimators=250;, score=0.260 total time=   0.8s
[CV 2/5] END .....max_depth=5, n_estimators=250;, score=0.260 total time=   0.8s
[CV 3/5] END .....max_depth=5, n_estimators=250;, score=0.142 total time=   0.8s
[CV 3/5] END .....max_depth=5, n_estimators=250;, score=0.142 total time=   0.8s
[CV 4/5] END ....max_depth=5, n_estimators=250;, score=-0.086 total time=   0.8s
[CV 4/5] END ....max_depth=5

[CV 3/5] END .....max_depth=5, n_estimators=250;, score=0.111 total time=   0.7s
[CV 3/5] END .....max_depth=5, n_estimators=250;, score=0.111 total time=   0.7s
[CV 4/5] END ....max_depth=5, n_estimators=250;, score=-0.063 total time=   0.6s
[CV 4/5] END ....max_depth=5, n_estimators=250;, score=-0.063 total time=   0.6s
[CV 5/5] END ....max_depth=5, n_estimators=250;, score=-0.024 total time=   0.8s
[CV 5/5] END ....max_depth=5, n_estimators=250;, score=-0.024 total time=   0.8s
[CV 1/5] END .....max_depth=7, n_estimators=50;, score=-0.021 total time=   0.2s
[CV 1/5] END .....max_depth=7, n_estimators=50;, score=-0.021 total time=   0.2s
[CV 2/5] END ......max_depth=7, n_estimators=50;, score=0.220 total time=   0.2s
[CV 2/5] END ......max_depth=7, n_estimators=50;, score=0.220 total time=   0.2s
[CV 3/5] END ......max_depth=7, n_estimators=50;, score=0.098 total time=   0.2s
[CV 3/5] END ......max_depth=7, n_estimators=50;, score=0.098 total time=   0.2s
[CV 4/5] END .....max_depth=

In [22]:
results_rfr

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,Random Forest,ohe_cat,2.35209,8.860686,0.348342,0.182466,2.887731,12.304613,0.204555,-0.525689
0,Random Forest,ohe_cat_ord,2.315307,8.519487,0.373436,0.033901,2.952549,12.48414,0.192949,-3.721246
0,Random Forest,label_cat,2.330609,8.724792,0.358336,0.280085,2.858023,12.175087,0.212929,-0.058475
0,Random Forest,cof_cat,2.342259,8.761346,0.355648,0.277069,2.891538,12.407902,0.197878,-0.078716
0,Random Forest,cof_cat_ord,1.90434,5.907486,0.565535,0.512551,2.795142,11.638422,0.247622,-0.011819
0,Random Forest,target_ord,2.335626,8.738981,0.357293,0.278914,2.838894,11.971444,0.226093,-0.040771
0,Random Forest,target_mean,1.852995,5.600619,0.588103,0.537872,2.860471,11.898233,0.230826,-0.034406


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,Random Forest,ohe_cat,2.35209,8.860686,0.348342,0.182466,2.887731,12.304613,0.204555,-0.525689
0,Random Forest,ohe_cat_ord,2.315307,8.519487,0.373436,0.033901,2.952549,12.48414,0.192949,-3.721246
0,Random Forest,label_cat,2.330609,8.724792,0.358336,0.280085,2.858023,12.175087,0.212929,-0.058475
0,Random Forest,cof_cat,2.342259,8.761346,0.355648,0.277069,2.891538,12.407902,0.197878,-0.078716
0,Random Forest,cof_cat_ord,1.90434,5.907486,0.565535,0.512551,2.795142,11.638422,0.247622,-0.011819
0,Random Forest,target_ord,2.335626,8.738981,0.357293,0.278914,2.838894,11.971444,0.226093,-0.040771
0,Random Forest,target_mean,1.852995,5.600619,0.588103,0.537872,2.860471,11.898233,0.230826,-0.034406


## XGBoost

In [23]:
results_xgb=pd.DataFrame()

In [24]:
for key, val in trans_dict.items():
    X_train_trans = val.fit_transform(X_train, y_train)
    X_test_trans = val.transform(X_test)
    
    param_grid = {'max_depth': [3, 5, 7],
                  'n_estimators': [50, 100],
                  'learning_rate': [0.1, 0.01, 0.05]
                 } 
    
    model = XGBRegressor()
    grid = GridSearchCV(model, param_grid, refit = True, verbose = 3)
    grid.fit(X_train_trans, y_train)
    model_best = grid.best_estimator_
    
    y_pred_train = model_best.predict(X_train_trans)
    y_pred_test = model_best.predict(X_test_trans)
    mae_train, mse_train, r2_train, adj_r2_train = metrics_calculation(
        y_train, y_pred_train, X_train_trans.shape[1])
    mae_test, mse_test, r2_test, adj_r2_test = metrics_calculation(
        y_test, y_pred_test, X_test_trans.shape[1])
    results_xgb = add_results(results_xgb, 'XGBoost', key, mae_train, mse_train,
                          r2_train, adj_r2_train, mae_test, mse_test,
                          r2_test, adj_r2_test)
    print(grid.best_params_)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=-0.172 total time=   0.6s
[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.272 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.080 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=-0.183 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=-0.056 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=-0.172 total time=   0.6s
[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.272 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.080 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=-0.183 total time=   0.0s
[CV 5/5] END learning_rate=

[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.234 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.287 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.234 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.287 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-2.407 total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-0.367 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-2.407 total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-0.367 total time=   0.0s
[CV 3/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-0.928 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators=100;, score=-1.374 total time=   0.0s
[CV 3/5] END learning_rate=0.01, m

[CV 1/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.144 total time=   0.1s
[CV 2/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=0.167 total time=   0.1s
[CV 3/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=0.077 total time=   0.1s
[CV 3/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=0.077 total time=   0.1s
[CV 4/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.239 total time=   0.1s
[CV 4/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.239 total time=   0.1s
[CV 5/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.396 total time=   0.1s
{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
[CV 5/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.396 total time=   0.1s
{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END learning_rate=0.1, max_depth=3, n_e

[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-0.937 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-1.275 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-1.216 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-5.311 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-1.376 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-1.216 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-5.311 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-1.376 total time=   0.0s
[CV 3/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.197 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=-3.211 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max

[CV 2/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=0.235 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.027 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=0.235 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.027 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.463 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.464 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.463 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.464 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.266 total time=   0.1s
[CV 1/5] END learning_rate=0.05, max_depth=7, n_estimators=100;, score=-0.266 total time=   0.1s
[CV 2/5] END learning_rate=0.05, max_dep

[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.173 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-2.155 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.173 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-2.155 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-0.340 total time=   0.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-0.838 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-0.340 total time=   0.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-0.838 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-1.284 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=-1.169 total time=   0.0s
[CV 4/5] END learning_rate=0.01,

[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=0.124 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.190 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=0.124 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.190 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.294 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.460 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=0.189 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=0.072 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.294 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=7, n_estimators=50;, score=-0.460 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_d

[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.260 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-5.202 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-1.368 total time=   0.0s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.034 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.260 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-5.202 total time=   0.0s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-1.368 total time=   0.0s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.034 total time=   0.0s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.091 total time=   0.0s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=-3.100 total time=   0.0s
[CV 1/5] END learning_rate=0.01, max_dep

[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.330 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.162 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.110 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.330 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.162 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.110 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=0.204 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=0.142 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=-0.216 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=5, n_estimators=100;, score=0.204 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_

[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.237 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.242 total time=   0.1s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=0.227 total time=   0.1s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.229 total time=   0.1s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=0.227 total time=   0.1s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.229 total time=   0.1s
[CV 4/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.172 total time=   0.1s
[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.234 total time=   0.1s
[CV 4/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.172 total time=   0.1s
[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.234 total time=   0.1s
[CV 1/5] END learning_rate=0.01, max_depth=

[CV 5/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.109 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.436 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=0.219 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.032 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.109 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.436 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=0.219 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.032 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.277 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.122 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth

[CV 3/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.003 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.278 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.416 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.003 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.278 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.416 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.228 total time=   0.0s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=0.114 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=-0.228 total time=   0.0s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=100;, score=0.114 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_e

[CV 2/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.281 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.118 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=-0.197 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.281 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.118 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=-0.197 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=-0.016 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=-0.305 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=0.192 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=5, n_estimators=50;, score=0.114 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_de

[CV 4/5] END learning_rate=0.1, max_depth=5, n_estimators=100;, score=-0.130 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=5, n_estimators=100;, score=-0.318 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.180 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=5, n_estimators=100;, score=-0.130 total time=   0.0s
[CV 5/5] END learning_rate=0.1, max_depth=5, n_estimators=100;, score=-0.318 total time=   0.0s
[CV 1/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.180 total time=   0.0s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=0.123 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.169 total time=   0.0s
[CV 4/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=-0.272 total time=   0.0s
[CV 2/5] END learning_rate=0.1, max_depth=7, n_estimators=50;, score=0.123 total time=   0.0s
[CV 3/5] END learning_rate=0.1, max_depth=7, n_e

[CV 1/5] END learning_rate=0.05, max_depth=3, n_estimators=50;, score=-0.162 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=3, n_estimators=50;, score=0.253 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=3, n_estimators=50;, score=0.095 total time=   0.0s
[CV 4/5] END learning_rate=0.05, max_depth=3, n_estimators=50;, score=-0.247 total time=   0.0s
[CV 5/5] END learning_rate=0.05, max_depth=3, n_estimators=50;, score=-0.039 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=-0.001 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.247 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.059 total time=   0.0s
[CV 1/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=-0.001 total time=   0.0s
[CV 2/5] END learning_rate=0.05, max_depth=3, n_estimators=100;, score=0.247 total time=   0.0s
[CV 3/5] END learning_rate=0.05, max_dep

In [25]:
results_xgb

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,XGBoost,ohe_cat,1.718106,5.064092,0.627562,0.53276,2.745231,11.155008,0.278873,-0.383146
0,XGBoost,ohe_cat_ord,1.723704,5.040912,0.629267,0.428367,2.864093,11.977925,0.225674,-3.529805
0,XGBoost,label_cat,1.68168,4.880431,0.641069,0.597297,2.777064,11.265214,0.271748,0.020627
0,XGBoost,cof_cat,1.709699,4.956512,0.635474,0.59102,2.761989,11.309377,0.268893,0.016788
0,XGBoost,cof_cat_ord,1.759849,5.196963,0.61779,0.571179,2.829321,12.014071,0.223338,-0.044477
0,XGBoost,target_ord,1.702839,5.044269,0.62902,0.583778,2.810711,11.562822,0.252509,-0.005246
0,XGBoost,target_mean,1.653907,4.701562,0.654224,0.612057,2.817289,11.715982,0.242608,-0.018562


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,XGBoost,ohe_cat,1.718106,5.064092,0.627562,0.53276,2.745231,11.155008,0.278873,-0.383146
0,XGBoost,ohe_cat_ord,1.723704,5.040912,0.629267,0.428367,2.864093,11.977925,0.225674,-3.529805
0,XGBoost,label_cat,1.68168,4.880431,0.641069,0.597297,2.777064,11.265214,0.271748,0.020627
0,XGBoost,cof_cat,1.709699,4.956512,0.635474,0.59102,2.761989,11.309377,0.268893,0.016788
0,XGBoost,cof_cat_ord,1.759849,5.196963,0.61779,0.571179,2.829321,12.014071,0.223338,-0.044477
0,XGBoost,target_ord,1.702839,5.044269,0.62902,0.583778,2.810711,11.562822,0.252509,-0.005246
0,XGBoost,target_mean,1.653907,4.701562,0.654224,0.612057,2.817289,11.715982,0.242608,-0.018562


In [26]:
results_fin = pd.concat([results, results_svm, results_dt, results_rfr, results_xgb])

In [27]:
results_fin.to_csv('results_1.csv')

In [29]:
results_fin.sort_values('Adj R2 test', ascending=False)

Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,XGBoost,label_cat,1.68168,4.880431,0.641069,0.597297,2.777064,11.265214,0.271748,0.020627
0,XGBoost,cof_cat,1.709699,4.956512,0.635474,0.59102,2.761989,11.309377,0.268893,0.016788
0,XGBoost,target_ord,1.702839,5.044269,0.62902,0.583778,2.810711,11.562822,0.252509,-0.005246
0,Random Forest,cof_cat_ord,1.90434,5.907486,0.565535,0.512551,2.795142,11.638422,0.247622,-0.011819
0,XGBoost,target_mean,1.653907,4.701562,0.654224,0.612057,2.817289,11.715982,0.242608,-0.018562
0,Random Forest,target_mean,1.852995,5.600619,0.588103,0.537872,2.860471,11.898233,0.230826,-0.034406
0,Random Forest,target_ord,2.335626,8.738981,0.357293,0.278914,2.838894,11.971444,0.226093,-0.040771
0,XGBoost,cof_cat_ord,1.759849,5.196963,0.61779,0.571179,2.829321,12.014071,0.223338,-0.044477
0,Random Forest,label_cat,2.330609,8.724792,0.358336,0.280085,2.858023,12.175087,0.212929,-0.058475
0,LinReg,cof_cat,2.474169,9.773287,0.281225,0.19357,2.891552,12.22716,0.209562,-0.063003


Unnamed: 0,Method,Transformer,MAE train,RMSE train,R2 train,Adj R2 train,MAE test,RMSE test,R2 test,Adj R2 test
0,XGBoost,label_cat,1.68168,4.880431,0.641069,0.597297,2.777064,11.265214,0.271748,0.020627
0,XGBoost,cof_cat,1.709699,4.956512,0.635474,0.59102,2.761989,11.309377,0.268893,0.016788
0,XGBoost,target_ord,1.702839,5.044269,0.62902,0.583778,2.810711,11.562822,0.252509,-0.005246
0,Random Forest,cof_cat_ord,1.90434,5.907486,0.565535,0.512551,2.795142,11.638422,0.247622,-0.011819
0,XGBoost,target_mean,1.653907,4.701562,0.654224,0.612057,2.817289,11.715982,0.242608,-0.018562
0,Random Forest,target_mean,1.852995,5.600619,0.588103,0.537872,2.860471,11.898233,0.230826,-0.034406
0,Random Forest,target_ord,2.335626,8.738981,0.357293,0.278914,2.838894,11.971444,0.226093,-0.040771
0,XGBoost,cof_cat_ord,1.759849,5.196963,0.61779,0.571179,2.829321,12.014071,0.223338,-0.044477
0,Random Forest,label_cat,2.330609,8.724792,0.358336,0.280085,2.858023,12.175087,0.212929,-0.058475
0,LinReg,cof_cat,2.474169,9.773287,0.281225,0.19357,2.891552,12.22716,0.209562,-0.063003
