## Import Libraries

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

## Load Data

In [2]:
tr = pd.read_csv('train.csv')
ts = pd.read_csv('test.csv')

In [3]:
tr.shape

(300000, 26)

In [4]:
tr.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
tr.describe()

Unnamed: 0,id,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
count,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0,300000.0
mean,250018.576947,0.527335,0.460926,0.490498,0.496689,0.491654,0.510526,0.467476,0.537119,0.498456,0.474872,0.474492,0.473216,0.494561,0.508273,8.241979
std,144450.15001,0.230599,0.214003,0.253346,0.219199,0.240074,0.228232,0.210331,0.21814,0.23992,0.218007,0.255949,0.222022,0.247292,0.22295,0.746555
min,1.0,-0.118039,-0.069309,-0.056104,0.130676,0.255908,0.045915,-0.224689,0.203763,-0.260275,0.117896,0.048732,0.052608,-0.074208,0.15105,0.140329
25%,124772.5,0.405965,0.310494,0.300604,0.329783,0.284188,0.354141,0.342873,0.355825,0.332486,0.306874,0.276017,0.308151,0.289074,0.300669,7.742071
50%,250002.5,0.497053,0.427903,0.502462,0.465026,0.39047,0.488865,0.429383,0.504661,0.439151,0.43462,0.459975,0.433812,0.422887,0.4724,8.191373
75%,375226.5,0.66806,0.615113,0.647512,0.664451,0.696599,0.669625,0.573383,0.703441,0.606056,0.614333,0.691579,0.642057,0.714502,0.758447,8.728634
max,499999.0,1.058443,0.887253,1.034704,1.03956,1.055424,1.067649,1.111552,1.032837,1.040229,0.982922,1.05596,1.071444,0.975035,0.905992,10.411992


In [6]:
tr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 26 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   id      300000 non-null  int64  
 1   cat0    300000 non-null  object 
 2   cat1    300000 non-null  object 
 3   cat2    300000 non-null  object 
 4   cat3    300000 non-null  object 
 5   cat4    300000 non-null  object 
 6   cat5    300000 non-null  object 
 7   cat6    300000 non-null  object 
 8   cat7    300000 non-null  object 
 9   cat8    300000 non-null  object 
 10  cat9    300000 non-null  object 
 11  cont0   300000 non-null  float64
 12  cont1   300000 non-null  float64
 13  cont2   300000 non-null  float64
 14  cont3   300000 non-null  float64
 15  cont4   300000 non-null  float64
 16  cont5   300000 non-null  float64
 17  cont6   300000 non-null  float64
 18  cont7   300000 non-null  float64
 19  cont8   300000 non-null  float64
 20  cont9   300000 non-null  float64
 21  cont10  30

## Preprocess Data

In [7]:
## Drop id Column
train = tr.drop('id', axis = 'columns')
test = ts.drop('id', axis = 'columns')
print('Train Data shape before dropping "id" Column {} \nTrain Data shape after dropping "id" Column {}'.format(tr.shape,train.shape))
print('\nTest Data shape before dropping "id" Column {} \nTest Data shape after dropping "id" Column {}'.format(ts.shape,test.shape))

## Separate Features and Target
y = tr['target']
features = train.drop('target', axis = 'columns')
print('\nTarget Shape {} \nFeatures Data shape {}'.format(y.shape,features.shape))

Train Data shape before dropping "id" Column (300000, 26) 
Train Data shape after dropping "id" Column (300000, 25)

Test Data shape before dropping "id" Column (200000, 25) 
Test Data shape after dropping "id" Column (200000, 24)

Target Shape (300000,) 
Features Data shape (300000, 24)


In [8]:
categorical_cols = [col for col in features.columns if features[col].dtype == 'O']
num_cols = [col for col in features.columns if features[col].dtype in ('int64', 'float64')]

In [9]:
i = 0
for col in categorical_cols:
    print('Number of Categories in {} column are {}'.format(col, len(features[col].unique())))
    i += len(features[col].unique())
print('Total Unique entries in Categorical columns {}'.format(i))

Number of Categories in cat0 column are 2
Number of Categories in cat1 column are 2
Number of Categories in cat2 column are 2
Number of Categories in cat3 column are 4
Number of Categories in cat4 column are 4
Number of Categories in cat5 column are 4
Number of Categories in cat6 column are 8
Number of Categories in cat7 column are 8
Number of Categories in cat8 column are 7
Number of Categories in cat9 column are 15
Total Unique entries in Categorical columns 56


In [10]:
# categorical_cols = categorical_cols[0:-1]
# high_cardinality_cols = ['cat9']
# print('Categorical Columns {}\nHigh Cardinality Categorical Columns {}\nNumerical Columns {}'.format(len(categorical_cols),
#                                                                                                     len(high_cardinality_cols),
#                                                                                                     len(num_cols)))

## Split Train and Validation Data

In [11]:
## Train test split
y = tr['target']
X = features.copy()
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)

In [12]:
print('X_train {}'.format(X_train.shape))
print('X_valid {}'.format(X_valid.shape))
print('y_train {}'.format(y_train.shape))
print('y_valid {}'.format(y_valid.shape))

X_train (240000, 24)
X_valid (60000, 24)
y_train (240000,)
y_valid (60000,)


## OnehotEncode all traindata

In [19]:
X1 = pd.get_dummies(X, drop_first=True)
print(X1.shape)

(300000, 60)


## Check if all Categorical columns are matching in Train and Test Files

In [13]:
(np.sort(train['cat9'].unique()) == np.sort(test['cat9'].unique()))

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [14]:
for col in categorical_cols:
    print( (np.sort(train[col].unique()) == np.sort(test[col].unique())).all() )

True
True
True
True
True
True
True
True
True
True


## OneHotEncode Categorical Columns using Scikit Learn

In [15]:
ohe = OneHotEncoder(drop = 'first', sparse=False)

oh_cat_cols_train = pd.DataFrame(ohe.fit_transform(X_train[categorical_cols]))
oh_cat_cols_train.index = X_train.index
num_X_train = X_train.drop(categorical_cols, axis=1)
OH_X_train = pd.concat([num_X_train, oh_cat_cols_train], axis=1)

oh_cat_cols_val = pd.DataFrame(ohe.transform(X_valid[categorical_cols]))
oh_cat_cols_val.index = X_valid.index
num_X_val = X_valid.drop(categorical_cols, axis=1)
OH_X_val = pd.concat([num_X_val, oh_cat_cols_val], axis=1)

oh_cat_cols_test = pd.DataFrame(ohe.transform(test[categorical_cols]))
oh_cat_cols_test.index = test.index
num_X_test = test.drop(categorical_cols, axis=1)
OH_X_test = pd.concat([num_X_test, oh_cat_cols_test], axis=1)

In [16]:
print('X_train OHE {}'.format(OH_X_train.shape))
print('X_valid OHE {}'.format(OH_X_val.shape))
print('X_test OHE {}'.format(OH_X_test.shape))

X_train OHE (240000, 60)
X_valid OHE (60000, 60)
X_test OHE (200000, 60)


## OneHotEncode Categorical Columns using Pandas Dummies

In [20]:
X_train_dum = pd.get_dummies(X_train, drop_first=True)
X_valid_dum = pd.get_dummies(X_valid, drop_first=True)
X_test_dum = pd.get_dummies(test, drop_first=True)

X_train_dum, X_valid_dum = X_train_dum.align(X_valid_dum, join='left', axis=1)
X_train_dum, X_test_dum = X_train_dum.align(X_test_dum, join='left', axis=1)

In [21]:
print('X_train Hot Encode Pandas {}'.format(X_train_dum.shape))
print('X_valid Hot Encode Pandas {}'.format(X_valid_dum.shape))
print('X_test Hot Encode Pandas {}'.format(X_test_dum.shape))

X_train Hot Encode Pandas (240000, 60)
X_valid Hot Encode Pandas (60000, 60)
X_test Hot Encode Pandas (200000, 60)


## 1. Decision Tree with Ordinal Encoding

In [19]:
## Ordinal Encoding
y1 = tr['target']
X1 = tr.drop('target', axis = 'columns')
X_train1, X_valid1, y_train1, y_valid1 = train_test_split(X1, y1, test_size=0.2, random_state=0)

ordinal_encoder = OrdinalEncoder()
X_train1[categorical_cols] = ordinal_encoder.fit_transform(X_train1[categorical_cols])
X_valid1[categorical_cols] = ordinal_encoder.transform(X_valid1[categorical_cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentati

In [20]:
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train1, y_train1)
dt_pred = dt_model.predict(X_valid1)
dt_mse = mean_squared_error(y_valid1, dt_pred, squared=False)
dt_mae = mean_absolute_error(y_valid1, dt_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(dt_mse, dt_mae))

Root Mean Squared Error 1.0622367622628097
Mean Absolute Error 0.8406284951997104


## 2. Decision Tree with OneHot Encoding (Sklean)

In [21]:
dt_model = DecisionTreeRegressor()
dt_model.fit(OH_X_train, y_train)
dt_pred = dt_model.predict(OH_X_val)
dt_mse = mean_squared_error(y_valid, dt_pred, squared=False)
dt_mae = mean_absolute_error(y_valid, dt_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(dt_mse, dt_mae))

Root Mean Squared Error 1.059524553186826
Mean Absolute Error 0.8381901174891734


## 3. Decision Tree with OneHot Encoding (Pandas)

In [22]:
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train_dum, y_train)
dt_pred = dt_model.predict(X_valid_dum)
dt_mse = mean_squared_error(y_valid, dt_pred, squared=False)
dt_mae = mean_absolute_error(y_valid, dt_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(dt_mse, dt_mae))

Root Mean Squared Error 1.0569472471458274
Mean Absolute Error 0.8361762911653638


## 4. RandomForest with OneHot Encoding

In [23]:
rf_model = RandomForestRegressor(random_state=1)
rf_model.fit(OH_X_train, y_train)
rf_pred = rf_model.predict(OH_X_val)
rf_mse = mean_squared_error(y_valid, rf_pred, squared = False)
rf_mae = mean_absolute_error(y_valid, rf_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(rf_mse, rf_mae))

Root Mean Squared Error 0.7371244199109364
Mean Absolute Error 0.584682034507699


## 5. XGBoost with OneHot Encoding

In [24]:
xg_model = XGBRegressor(n_estimators = 500, n_jobs = -1)
xg_model.fit(OH_X_train, y_train)
xg_pred = xg_model.predict(OH_X_val)
xg_mse = mean_squared_error(y_valid, xg_pred, squared = False)
xg_mae = mean_absolute_error(y_valid, xg_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(xg_mse, xg_mae))

Root Mean Squared Error 0.7346368612780398
Mean Absolute Error 0.5838970737118078


## 6. XGBoost with OneHot Encoding (Optimized - A)

In [25]:
xg_model = XGBRegressor(n_estimators = 1000, learning_rate=0.05, n_jobs = -1)
xg_model.fit(OH_X_train, y_train)
xg_pred = xg_model.predict(OH_X_val)
xg_mse = mean_squared_error(y_valid, xg_pred, squared = False)
xg_mae = mean_absolute_error(y_valid, xg_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(xg_mse, xg_mae))

Root Mean Squared Error 0.7224471520010992
Mean Absolute Error 0.574430716823482


## 7. XGBoost Multiple Runs

In [21]:
def xgb_boost_reg(est, lr, x_train, y_train, x_valid, y_valid):
    xg_model = XGBRegressor(n_estimators = est, learning_rate=lr, n_jobs = -1, random_state=0)
    xg_model.fit(x_train, y_train)
    xg_pred = xg_model.predict(x_valid)
    xg_mse = mean_squared_error(y_valid, xg_pred, squared = False)
    xg_mae = mean_absolute_error(y_valid, xg_pred)
    #print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(xg_mse, xg_mae))
    return xg_mse, xg_mae

In [22]:
# mse , mae = xgb_boost_reg(est = 1000, lr = 0.05, x_train = OH_X_train, y_train = y_train, x_valid = OH_X_val, y_valid = y_valid)
# print(mse, mae)

In [23]:
n_est = [750, 1000, 1250]
lr = [0.04, 0.05, 0.06, 0.07, 0.08]
lr.sort()
results_mae = pd.DataFrame(index=n_est, columns=lr)
results_mse = pd.DataFrame(index=n_est, columns=lr)
results_mae

Unnamed: 0,0.04,0.05,0.06,0.07,0.08
750,,,,,
1000,,,,,
1250,,,,,


In [None]:
for i,c in enumerate(results_mae):
    #print(i, c)
    for r in range(len(results_mae)):
        #print(n_est[r])
        mse , mae = xgb_boost_reg(est = n_est[r], lr = c, x_train = OH_X_train, y_train = y_train, x_valid = OH_X_val, y_valid = y_valid)
        results_mse.iloc[r, i] = mse
        results_mae.iloc[r, i] = mae

In [None]:
results_mse

In [None]:
results_mse.min()

In [65]:
results_mse.min().min()

0.722139979052513

## 8. XGBoost Multiple Runs (Optimized XGB Parameters)

In [66]:
params = {'learning_rate': 0.07853392035787837, 'reg_lambda': 1.7549293092194938e-05, 
          'reg_alpha': 14.68267919457715, 'subsample': 0.8031450486786944, 
          'colsample_bytree': 0.170759104940733, 'max_depth': 3}
xg_model = XGBRegressor(n_estimators = 5000, n_jobs = -1, random_state = 0, **params)
xg_model.fit(OH_X_train, y_train, early_stopping_rounds=300, eval_set=[(OH_X_val, y_valid)], verbose=1000)
xg_pred = xg_model.predict(OH_X_val)
xg_mse = mean_squared_error(y_valid, xg_pred, squared = False)
xg_mae = mean_absolute_error(y_valid, xg_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(xg_mse, xg_mae))

[0]	validation_0-rmse:7.16997
[1000]	validation_0-rmse:0.72059
[2000]	validation_0-rmse:0.71879
[3000]	validation_0-rmse:0.71863
[3207]	validation_0-rmse:0.71867
Root Mean Squared Error 0.7186118486288169
Mean Absolute Error 0.5718444099106272


## 9. Default Model Performance from pycaret

In [22]:
from pycaret.regression import *

In [27]:
train = tr.drop('id', axis = 'columns')
X2 = pd.get_dummies(train, drop_first=True)
print(X2.shape)

(300000, 61)


In [28]:
# Provididing Clean Data to setup
s = setup(X2, target = 'target', session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,target
2,Original Data,"(300000, 61)"
3,Missing Values,False
4,Numeric Features,60
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(209999, 60)"


In [29]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,0.5733,0.5199,0.721,0.0653,0.0784,0.0705,20.93
lightgbm,Light Gradient Boosting Machine,0.5757,0.5255,0.7249,0.0553,0.0789,0.0708,1.906
xgboost,Extreme Gradient Boosting,0.5767,0.5261,0.7253,0.0541,0.0789,0.0709,30.983
gbr,Gradient Boosting Regressor,0.5799,0.5337,0.7306,0.0405,0.0795,0.0714,42.508
rf,Random Forest Regressor,0.5836,0.5404,0.7351,0.0286,0.08,0.0719,165.788
lr,Linear Regression,0.585,0.5438,0.7374,0.0224,0.0803,0.072,1.186
ridge,Ridge Regression,0.585,0.5438,0.7374,0.0224,0.0803,0.072,0.131
br,Bayesian Ridge,0.5849,0.5438,0.7374,0.0224,0.0803,0.072,1.857
lar,Least Angle Regression,0.585,0.5438,0.7374,0.0223,0.0803,0.072,0.145
huber,Huber Regressor,0.5843,0.5451,0.7383,0.0201,0.0803,0.0717,11.851


In [41]:
model = create_model('lightgbm')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,0.5772,0.5246,0.7243,0.0566,0.0784,0.0705
1,0.5717,0.5147,0.7174,0.0535,0.0774,0.0697
2,0.5796,0.5295,0.7276,0.0539,0.0789,0.0709
3,0.5777,0.5309,0.7286,0.0502,0.0796,0.0709
4,0.5797,0.5343,0.731,0.0543,0.0797,0.0712
5,0.5753,0.5261,0.7253,0.0545,0.0786,0.0704
6,0.5744,0.5231,0.7233,0.0595,0.0788,0.0706
7,0.5736,0.5258,0.7251,0.0571,0.0798,0.0729
8,0.5755,0.5259,0.7252,0.0588,0.0792,0.0709
9,0.5724,0.5198,0.721,0.0549,0.0784,0.0703


In [42]:
print(model)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.1, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
              random_state=123, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)


In [47]:
lg_model = LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.1, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
              random_state=123, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)
lg_model.fit(OH_X_train, y_train)
lg_pred = lg_model.predict(OH_X_val)
lg_mse = mean_squared_error(y_valid, lg_pred, squared = False)
lg_mae = mean_absolute_error(y_valid, lg_pred)
print('Root Mean Squared Error {}\nMean Absolute Error {}'.format(lg_mse, lg_mae))

Root Mean Squared Error 0.7259878625204318
Mean Absolute Error 0.5759559951762175


## 10. Optimized Model from pycaret

In [48]:
tuned_model_1 = tune_model(model, search_library = 'optuna')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,0.5771,0.5237,0.7237,0.0582,0.0783,0.0705
1,0.5732,0.5161,0.7184,0.0508,0.0775,0.0699
2,0.5803,0.5306,0.7284,0.0519,0.079,0.071
3,0.5777,0.5304,0.7283,0.0511,0.0795,0.0709
4,0.5806,0.5348,0.7313,0.0534,0.0797,0.0713
5,0.5747,0.524,0.7239,0.0582,0.0784,0.0703
6,0.5742,0.5217,0.7223,0.0621,0.0787,0.0705
7,0.5738,0.5242,0.724,0.0599,0.0797,0.0729
8,0.5756,0.5246,0.7243,0.0612,0.079,0.0709
9,0.572,0.5197,0.7209,0.0551,0.0784,0.0702


In [49]:
print(tuned_model_1)

LGBMRegressor(bagging_fraction=0.43679733794174347, bagging_freq=4,
              boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              feature_fraction=0.9001841001160923, importance_type='split',
              learning_rate=0.05439296712851782, max_depth=-1,
              min_child_samples=81, min_child_weight=0.001,
              min_split_gain=0.7394984245515656, n_estimators=264, n_jobs=-1,
              num_leaves=109, objective=None, random_state=123,
              reg_alpha=0.1587450786338632, reg_lambda=8.859254362320617e-10,
              silent=True, subsample=1.0, subsample_for_bin=200000,
              subsample_freq=0)


In [50]:
tuned_model_2 = tune_model(model, search_library = 'scikit-learn')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,0.5741,0.5187,0.7202,0.0672,0.078,0.0701
1,0.5703,0.5107,0.7146,0.0608,0.0771,0.0695
2,0.5785,0.5262,0.7254,0.0598,0.0786,0.0707
3,0.5758,0.525,0.7245,0.0608,0.0791,0.0707
4,0.5792,0.5295,0.7276,0.0628,0.0793,0.0711
5,0.5742,0.5218,0.7224,0.0622,0.0782,0.0702
6,0.5726,0.5184,0.72,0.0681,0.0784,0.0703
7,0.5714,0.5206,0.7216,0.0663,0.0794,0.0726
8,0.5734,0.5205,0.7214,0.0685,0.0787,0.0706
9,0.5708,0.5155,0.718,0.0628,0.0781,0.07


In [51]:
tuned_model_3 = tune_model(model, search_library = 'scikit-optimize')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,0.5777,0.5265,0.7256,0.0533,0.0786,0.0706
1,0.5723,0.5163,0.7186,0.0504,0.0775,0.0698
2,0.5807,0.5317,0.7292,0.0499,0.0791,0.071
3,0.5782,0.532,0.7294,0.0483,0.0797,0.071
4,0.5812,0.5362,0.7322,0.051,0.0799,0.0714
5,0.5758,0.5278,0.7265,0.0515,0.0787,0.0705
6,0.5761,0.5262,0.7254,0.0541,0.0791,0.0708
7,0.5751,0.5282,0.7268,0.0526,0.08,0.0732
8,0.5763,0.5283,0.7268,0.0546,0.0793,0.071
9,0.5734,0.522,0.7225,0.051,0.0786,0.0704
