# Step 1: Import helpful libraries

In [2]:
# Familiar imports

#basic tools 
import time
import sys
import os
import numpy as np
import pandas as pd

#graph, plots
import matplotlib.pyplot as plt
import seaborn as sns

# For ordinal encoding categorical variables, splitting data
import sklearn as sk
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.model_selection import KFold, GridSearchCV, train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve

# For training LGBM
from lightgbm import LGBMRegressor
from tqdm import tqdm
import xgboost as xgb

#tuning hyperparameters
from skopt  import BayesSearchCV 

import optuna
from functools import partial
from termcolor import colored

import shap
import warnings
#warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

print("set up complete")

set up complete


In [3]:
#Python libraries and their versions used for this problem
print('SciKit Learn:',sk.__version__)
print('Pandas:',pd.__version__)
print('Numpy:',np.__version__)
print('Seaborn:',sns.__version__)

SciKit Learn: 0.23.2
Pandas: 1.3.1
Numpy: 1.20.3
Seaborn: 0.11.2


# Step 2: Load the data

In [4]:
#https://towardsdatascience.com/make-working-with-large-dataframes-easier-at-least-for-your-memory-6f52b5f4b5c4
    
def reduce_mem_usage(df, verbose=True):
    numerics = ['int8','int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [66]:
%%time
#local
train = reduce_mem_usage(pd.read_csv("./dataset/train.csv",encoding='utf-8', index_col=0, low_memory=False))
test = reduce_mem_usage(pd.read_csv("./dataset/test.csv",encoding='utf-8', index_col=0, low_memory=False))

#Internet
#train = reduce_mem_usage(pd.read_csv("../input/30-days-of-ml/train.csv",encoding='utf-8', index_col=0, low_memory=False))
#test = reduce_mem_usage(pd.read_csv("../input/30-days-of-ml/test.csv",encoding='utf-8', index_col=0, low_memory=False))

#Sem redução de espaço
#train = pd.read_csv("../input/30-days-of-ml/train.csv",encoding='utf-8', index_col=0, low_memory=False)
#test = pd.read_csv("../input/30-days-of-ml/test.csv",encoding='utf-8', index_col=0, low_memory=False)

print("\nShape of train set: ",train.shape)
print("Shape of test set: ",test.shape)

print("\nload complete")

Mem. usage decreased to 33.76 Mb (43.3% reduction)
Mem. usage decreased to 22.13 Mb (42.0% reduction)

Shape of train set:  (300000, 25)
Shape of test set:  (200000, 24)

load complete
Wall time: 2.05 s


In [6]:
# Preview data
train.head(3)

Unnamed: 0_level_0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,B,B,B,C,B,B,A,E,C,N,...,0.400391,0.160278,0.311035,0.389404,0.267578,0.237305,0.37793,0.32251,0.869629,8.117188
2,B,B,A,A,B,D,A,F,A,O,...,0.533203,0.559082,0.516113,0.594727,0.341553,0.90625,0.921875,0.261963,0.465088,8.484375
3,A,A,A,C,B,D,A,D,A,F,...,0.650391,0.375244,0.902344,0.555176,0.84375,0.749023,0.620117,0.541504,0.763672,8.367188


In [7]:
train.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
Int64Index: 300000 entries, 1 to 499999
Data columns (total 25 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   cat0    300000 non-null  object 
 1   cat1    300000 non-null  object 
 2   cat2    300000 non-null  object 
 3   cat3    300000 non-null  object 
 4   cat4    300000 non-null  object 
 5   cat5    300000 non-null  object 
 6   cat6    300000 non-null  object 
 7   cat7    300000 non-null  object 
 8   cat8    300000 non-null  object 
 9   cat9    300000 non-null  object 
 10  cont0   300000 non-null  float16
 11  cont1   300000 non-null  float16
 12  cont2   300000 non-null  float16
 13  cont3   300000 non-null  float16
 14  cont4   300000 non-null  float16
 15  cont5   300000 non-null  float16
 16  cont6   300000 non-null  float16
 17  cont7   300000 non-null  float16
 18  cont8   300000 non-null  float16
 19  cont9   300000 non-null  float16
 20  cont10  300000 non-null  float16
 21  cont11  30

In [8]:
print('Info about train data: ')
print('Number of rows:',colored(train.shape[0],'green'))
print('Number of columns:',colored(train.shape[1],'green'))
print('Number of missing values:',colored(sum(train.isna().sum()),'green'))

Info about train data: 
Number of rows: [32m300000[0m
Number of columns: [32m25[0m
Number of missing values: [32m0[0m


In [9]:
num_col = list(train.select_dtypes(include='float16').columns)
cat_cols = list(train.select_dtypes(include='object').columns)
num_col.remove('target')
print('Number of numerical columns is:',colored(len(num_col),'green'),
      '\nNumber of categorical columsn is:',colored(len(cat_cols),'green'))

Number of numerical columns is: [32m14[0m 
Number of categorical columsn is: [32m10[0m


In [10]:
print('target column basic statistics:')
target=train['target'].copy()
target.describe()

target column basic statistics:


count    300000.000000
mean               NaN
std           0.000000
min           0.140381
25%           7.742188
50%           8.187500
75%           8.726562
max          10.414062
Name: target, dtype: float64

In [67]:
# Separate target from features
y_train = train['target'].copy()
X_train = train.drop(['target'], axis=1).copy()

In [68]:
# Assuring that test data and whether or not it has the same columns as the train
if list(test.columns) == list(X_train.columns):
    print(colored('True', 'green'))  
else:
    print(colored('False', 'red'))  


[32mTrue[0m


# Step 3: Prepare the data

In [69]:
# Checking if there are missing values in the datasets
#Train
print(f'Train null values:',colored(X_train.isna().sum().sum(), 'green'))

#Test
print(f'Test null values:',colored(test.isna().sum().sum(), 'green'))

Train null values: [32m0[0m
Test null values: [32m0[0m


In [70]:
categorical_feature = np.where(X_train.dtypes != 'float16')[0].tolist()
categorical_feature_columns = X_train.select_dtypes(exclude=['float16']).columns
#categorical_feature_columns = [feature for feature in train.columns if 'cat' in feature]

In [71]:
#Checking if test categorical unique values are all subsets of their train peers

lis = []
for i in X_train[categorical_feature_columns].columns:
    test_vals = set(test[i].unique())
    X_vals = set(X_train[i].unique())
    lis.append(test_vals.issubset(X_vals))

print(colored(all(lis),'green'))

[32mTrue[0m


### Features Standarization

In [72]:
#cat_cols = [feature for feature in train.columns if 'cat' in feature]
cat_cols = categorical_feature_columns.tolist()

def label_encoder(df):
    for feature in cat_cols:
        le = LabelEncoder()
        le.fit(df[feature])
        df[feature] = le.transform(df[feature])
    return df

X_train = label_encoder(X_train)
X_test = label_encoder(X_test)

print('Info about train data: ')
print('Number of rows:',colored(X_train.shape[0],'green'))
print('Number of columns:',colored(X_train.shape[1],'green'))

print('\nInfo about test data: ')
print('Number of rows:',colored(test.shape[0],'green'))
print('Number of columns:',colored(test.shape[1],'green'))

Info about train data: 
Number of rows: [32m300000[0m
Number of columns: [32m24[0m

Info about test data: 
Number of rows: [32m200000[0m
Number of columns: [32m24[0m


In [24]:
# Extreme Fine Tuning LGBM using 7-step training
# https://www.kaggle.com/awwalmalhi/extreme-fine-tuning-lgbm-using-7-step-training#Extreme-Fine-Tuning-of-LGBM-using-Incremental-training

def objective(trial, X, y, name='xgb'):
        
    params = {'max_depth':trial.suggest_int('max_depth', 5, 50),
              'n_estimators':200000,
              #'boosting':trial.suggest_categorical('boosting', ['gbdt', 'dart', 'goss']),
              'subsample': trial.suggest_uniform('subsample', 0.2, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.2, 1.0),
              'learning_rate':trial.suggest_uniform('learning_rate', 0.007, 0.02),
              'reg_lambda':trial.suggest_uniform('reg_lambda', 0.01, 50),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 0.01, 50),
              'min_child_samples':trial.suggest_int('min_child_samples', 5, 100),
              'num_leaves':trial.suggest_int('num_leaves', 10, 200),
              'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 300, 1000),
              'cat_smooth':trial.suggest_int('cat_smooth', 5, 100),
              'cat_l2':trial.suggest_loguniform('cat_l2', 1e-3, 100)}

    model = LGBMRegressor(**params)
                  
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)
    

    model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
              eval_metric=['rmse'],
              early_stopping_rounds=250, 
              categorical_feature==[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
              #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
              verbose=0)

    train_score = np.round(np.sqrt(mean_squared_error(y_train, model.predict(X_train))), 5)
    test_score = np.round(np.sqrt(mean_squared_error(y_val, model.predict(X_val))), 5)
                  
    print(f'TRAIN RMSE : {train_score} || TEST RMSE : {test_score}')
                  
    return test_score

In [26]:
%%time

optimize = partial(objective, X=X_train, y=y_train)

study_lgbm = optuna.create_study(direction='minimize')
study_lgbm.optimize(optimize, n_trials=50)

# i have commented out the trials so as to cut short the notebook execution time.

[32m[I 2021-08-25 19:12:27,588][0m A new study created in memory with name: no-name-3f603163-b855-4681-bf74-9c40b0075d87[0m
Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:13:27,693][0m Trial 0 finished with value: 0.7235 and parameters: {'max_depth': 24, 'subsample': 0.650377538253578, 'colsample_bytree': 0.9739991188836374, 'learning_rate': 0.017736556613430553, 'reg_lambda': 17.060055750570093, 'reg_alpha': 43.13528717722097, 'min_child_samples': 96, 'num_leaves': 188, 'max_bin': 306, 'cat_smooth': 90, 'cat_l2': 0.0011258262212089634}. Best is trial 0 with value: 0.7235.[0m


TRAIN RMSE : 0.67568 || TEST RMSE : 0.7235


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:15:16,114][0m Trial 1 finished with value: 0.72209 and parameters: {'max_depth': 22, 'subsample': 0.5935044134076226, 'colsample_bytree': 0.8972929315673037, 'learning_rate': 0.0078060195541806154, 'reg_lambda': 11.88916361331115, 'reg_alpha': 7.134208679906236, 'min_child_samples': 15, 'num_leaves': 62, 'max_bin': 429, 'cat_smooth': 53, 'cat_l2': 2.8329429595243463}. Best is trial 1 with value: 0.72209.[0m


TRAIN RMSE : 0.681 || TEST RMSE : 0.72209


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:17:11,376][0m Trial 2 finished with value: 0.7216 and parameters: {'max_depth': 40, 'subsample': 0.20820416839057299, 'colsample_bytree': 0.8437891087708023, 'learning_rate': 0.012594872487482402, 'reg_lambda': 18.114208708321296, 'reg_alpha': 24.22601312953062, 'min_child_samples': 61, 'num_leaves': 129, 'max_bin': 950, 'cat_smooth': 90, 'cat_l2': 0.06772555618953868}. Best is trial 2 with value: 0.7216.[0m


TRAIN RMSE : 0.66712 || TEST RMSE : 0.7216


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:19:15,677][0m Trial 3 finished with value: 0.72021 and parameters: {'max_depth': 45, 'subsample': 0.4833929762981418, 'colsample_bytree': 0.6454825354900151, 'learning_rate': 0.009727983224596688, 'reg_lambda': 0.9093054197885521, 'reg_alpha': 15.685818505564512, 'min_child_samples': 56, 'num_leaves': 16, 'max_bin': 708, 'cat_smooth': 48, 'cat_l2': 0.5262628598083876}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.69797 || TEST RMSE : 0.72021


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:20:30,243][0m Trial 4 finished with value: 0.72038 and parameters: {'max_depth': 32, 'subsample': 0.704280034057801, 'colsample_bytree': 0.5755978076361812, 'learning_rate': 0.015350453088061114, 'reg_lambda': 32.70088229476231, 'reg_alpha': 21.26449969455633, 'min_child_samples': 96, 'num_leaves': 20, 'max_bin': 447, 'cat_smooth': 79, 'cat_l2': 0.0010331149593779958}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.69519 || TEST RMSE : 0.72038


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:21:18,555][0m Trial 5 finished with value: 0.7218 and parameters: {'max_depth': 15, 'subsample': 0.671523124305001, 'colsample_bytree': 0.9257161293256855, 'learning_rate': 0.01672738468113521, 'reg_lambda': 42.78049172420485, 'reg_alpha': 0.9977592587118951, 'min_child_samples': 85, 'num_leaves': 62, 'max_bin': 639, 'cat_smooth': 27, 'cat_l2': 22.525018532390792}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.67916 || TEST RMSE : 0.7218


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:22:38,566][0m Trial 6 finished with value: 0.7232 and parameters: {'max_depth': 32, 'subsample': 0.8525388176145956, 'colsample_bytree': 0.8313762150122634, 'learning_rate': 0.015842981979030463, 'reg_lambda': 44.95130714594471, 'reg_alpha': 45.12460294707443, 'min_child_samples': 92, 'num_leaves': 181, 'max_bin': 327, 'cat_smooth': 52, 'cat_l2': 0.006053053353562807}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.67605 || TEST RMSE : 0.7232


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:24:47,455][0m Trial 7 finished with value: 0.72206 and parameters: {'max_depth': 21, 'subsample': 0.4444726305266299, 'colsample_bytree': 0.9862106781951872, 'learning_rate': 0.008887171474770821, 'reg_lambda': 21.059672931098035, 'reg_alpha': 36.88671960798414, 'min_child_samples': 78, 'num_leaves': 96, 'max_bin': 832, 'cat_smooth': 8, 'cat_l2': 0.19672163705281415}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.68355 || TEST RMSE : 0.72206


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:26:01,340][0m Trial 8 finished with value: 0.72112 and parameters: {'max_depth': 37, 'subsample': 0.6811220397863296, 'colsample_bytree': 0.8427387336344629, 'learning_rate': 0.012176349310086444, 'reg_lambda': 28.905983288111724, 'reg_alpha': 10.365858597121782, 'min_child_samples': 30, 'num_leaves': 29, 'max_bin': 520, 'cat_smooth': 18, 'cat_l2': 0.009528988642075624}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.691 || TEST RMSE : 0.72112


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:27:24,317][0m Trial 9 finished with value: 0.72088 and parameters: {'max_depth': 8, 'subsample': 0.5508896096444098, 'colsample_bytree': 0.6025539731375689, 'learning_rate': 0.0160864882726673, 'reg_lambda': 16.621526843175662, 'reg_alpha': 26.99600790902615, 'min_child_samples': 65, 'num_leaves': 123, 'max_bin': 851, 'cat_smooth': 73, 'cat_l2': 85.8117652398949}. Best is trial 3 with value: 0.72021.[0m


TRAIN RMSE : 0.67738 || TEST RMSE : 0.72088


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:29:33,735][0m Trial 10 finished with value: 0.71954 and parameters: {'max_depth': 50, 'subsample': 0.2996673267123508, 'colsample_bytree': 0.30025440279015386, 'learning_rate': 0.010105407525495675, 'reg_lambda': 0.4046737702255515, 'reg_alpha': 15.023586768082899, 'min_child_samples': 39, 'num_leaves': 53, 'max_bin': 702, 'cat_smooth': 42, 'cat_l2': 1.9039426241047863}. Best is trial 10 with value: 0.71954.[0m


TRAIN RMSE : 0.68452 || TEST RMSE : 0.71954


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:32:48,595][0m Trial 11 finished with value: 0.7193 and parameters: {'max_depth': 50, 'subsample': 0.3090921013917277, 'colsample_bytree': 0.24300308463932585, 'learning_rate': 0.01029506625664597, 'reg_lambda': 2.0129407608212153, 'reg_alpha': 15.022139679421853, 'min_child_samples': 39, 'num_leaves': 10, 'max_bin': 700, 'cat_smooth': 41, 'cat_l2': 2.321197971374527}. Best is trial 11 with value: 0.7193.[0m


TRAIN RMSE : 0.70064 || TEST RMSE : 0.7193


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:34:19,243][0m Trial 12 finished with value: 0.71977 and parameters: {'max_depth': 49, 'subsample': 0.22777306435445055, 'colsample_bytree': 0.20948028871918734, 'learning_rate': 0.010602991408801067, 'reg_lambda': 0.09619103257989392, 'reg_alpha': 0.3960473825008144, 'min_child_samples': 34, 'num_leaves': 56, 'max_bin': 683, 'cat_smooth': 38, 'cat_l2': 4.3015636111598115}. Best is trial 11 with value: 0.7193.[0m


TRAIN RMSE : 0.67826 || TEST RMSE : 0.71977


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:35:47,737][0m Trial 13 finished with value: 0.71918 and parameters: {'max_depth': 50, 'subsample': 0.33768583704582744, 'colsample_bytree': 0.20501827858315472, 'learning_rate': 0.011099820588496049, 'reg_lambda': 7.003204391581924, 'reg_alpha': 13.997294228390965, 'min_child_samples': 39, 'num_leaves': 33, 'max_bin': 800, 'cat_smooth': 35, 'cat_l2': 2.9334241378243573}. Best is trial 13 with value: 0.71918.[0m


TRAIN RMSE : 0.69474 || TEST RMSE : 0.71918


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:38:45,431][0m Trial 14 finished with value: 0.72018 and parameters: {'max_depth': 44, 'subsample': 0.35276759384330053, 'colsample_bytree': 0.34929997344799546, 'learning_rate': 0.0071751640263610006, 'reg_lambda': 8.563318445208823, 'reg_alpha': 30.94525080832612, 'min_child_samples': 9, 'num_leaves': 88, 'max_bin': 823, 'cat_smooth': 30, 'cat_l2': 14.358722708744802}. Best is trial 13 with value: 0.71918.[0m


TRAIN RMSE : 0.68228 || TEST RMSE : 0.72018


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:41:05,634][0m Trial 15 finished with value: 0.719 and parameters: {'max_depth': 50, 'subsample': 0.34216474081254483, 'colsample_bytree': 0.2006118632739161, 'learning_rate': 0.013895107034426622, 'reg_lambda': 6.712527303261229, 'reg_alpha': 7.530782057595565, 'min_child_samples': 44, 'num_leaves': 10, 'max_bin': 974, 'cat_smooth': 66, 'cat_l2': 0.4895693860872336}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69847 || TEST RMSE : 0.719


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:42:02,153][0m Trial 16 finished with value: 0.72 and parameters: {'max_depth': 43, 'subsample': 0.4006777497916289, 'colsample_bytree': 0.43023328501888547, 'learning_rate': 0.019682210707673127, 'reg_lambda': 8.359517176734585, 'reg_alpha': 5.801784316440268, 'min_child_samples': 21, 'num_leaves': 38, 'max_bin': 988, 'cat_smooth': 63, 'cat_l2': 0.392691103521174}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.67862 || TEST RMSE : 0.72


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:43:21,383][0m Trial 17 finished with value: 0.72085 and parameters: {'max_depth': 36, 'subsample': 0.20195505591749174, 'colsample_bytree': 0.4502183376359949, 'learning_rate': 0.013547072540770125, 'reg_lambda': 6.251730065981647, 'reg_alpha': 19.35906213463938, 'min_child_samples': 50, 'num_leaves': 150, 'max_bin': 917, 'cat_smooth': 62, 'cat_l2': 0.11427793256467242}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.66947 || TEST RMSE : 0.72085


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:44:40,053][0m Trial 18 finished with value: 0.71931 and parameters: {'max_depth': 49, 'subsample': 0.9817926282433634, 'colsample_bytree': 0.2027028609967189, 'learning_rate': 0.013924211094406258, 'reg_lambda': 36.42372761868906, 'reg_alpha': 9.111229067093285, 'min_child_samples': 48, 'num_leaves': 38, 'max_bin': 775, 'cat_smooth': 6, 'cat_l2': 0.01848490903103336}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68887 || TEST RMSE : 0.71931


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:45:59,128][0m Trial 19 finished with value: 0.72037 and parameters: {'max_depth': 30, 'subsample': 0.5066359243494016, 'colsample_bytree': 0.34473378304688385, 'learning_rate': 0.011930929393246671, 'reg_lambda': 12.524949885685198, 'reg_alpha': 2.811944166479087, 'min_child_samples': 71, 'num_leaves': 81, 'max_bin': 907, 'cat_smooth': 74, 'cat_l2': 0.7479943324173777}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.66857 || TEST RMSE : 0.72037


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:47:52,617][0m Trial 20 finished with value: 0.71945 and parameters: {'max_depth': 8, 'subsample': 0.2801363546383644, 'colsample_bytree': 0.45756999285394434, 'learning_rate': 0.014468229061633853, 'reg_lambda': 25.500031863478565, 'reg_alpha': 11.840501374138679, 'min_child_samples': 26, 'num_leaves': 10, 'max_bin': 994, 'cat_smooth': 99, 'cat_l2': 38.83667899287722}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69814 || TEST RMSE : 0.71945


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:50:22,838][0m Trial 21 finished with value: 0.71938 and parameters: {'max_depth': 50, 'subsample': 0.357503252248676, 'colsample_bytree': 0.2593198120011084, 'learning_rate': 0.01095735387132344, 'reg_lambda': 4.246209482891695, 'reg_alpha': 14.760738342312107, 'min_child_samples': 44, 'num_leaves': 13, 'max_bin': 595, 'cat_smooth': 32, 'cat_l2': 7.01621716821574}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69872 || TEST RMSE : 0.71938


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:52:46,601][0m Trial 22 finished with value: 0.71904 and parameters: {'max_depth': 45, 'subsample': 0.3007128494118977, 'colsample_bytree': 0.21009541357835385, 'learning_rate': 0.00869625339872456, 'reg_lambda': 2.7212520776002838, 'reg_alpha': 16.5554712337564, 'min_child_samples': 36, 'num_leaves': 38, 'max_bin': 760, 'cat_smooth': 22, 'cat_l2': 1.1000408464523228}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.6896 || TEST RMSE : 0.71904


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:55:20,085][0m Trial 23 finished with value: 0.71929 and parameters: {'max_depth': 45, 'subsample': 0.4131972893655961, 'colsample_bytree': 0.2065876093358091, 'learning_rate': 0.008571134668218355, 'reg_lambda': 11.403640594284743, 'reg_alpha': 20.260632484290994, 'min_child_samples': 20, 'num_leaves': 35, 'max_bin': 778, 'cat_smooth': 20, 'cat_l2': 0.9229500952453041}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69183 || TEST RMSE : 0.71929


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:56:37,570][0m Trial 24 finished with value: 0.72028 and parameters: {'max_depth': 40, 'subsample': 0.23855137680973965, 'colsample_bytree': 0.3657686108910122, 'learning_rate': 0.012429132822817, 'reg_lambda': 6.199814043441124, 'reg_alpha': 4.821676893158507, 'min_child_samples': 40, 'num_leaves': 74, 'max_bin': 764, 'cat_smooth': 16, 'cat_l2': 0.046018599885843524}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.67276 || TEST RMSE : 0.72028


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 19:59:10,096][0m Trial 25 finished with value: 0.71943 and parameters: {'max_depth': 48, 'subsample': 0.34443423096704395, 'colsample_bytree': 0.2814828244142844, 'learning_rate': 0.008929977579565013, 'reg_lambda': 13.453289601876792, 'reg_alpha': 28.63420530880592, 'min_child_samples': 31, 'num_leaves': 48, 'max_bin': 871, 'cat_smooth': 61, 'cat_l2': 1.1884611413181358}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.6888 || TEST RMSE : 0.71943


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:00:36,263][0m Trial 26 finished with value: 0.72063 and parameters: {'max_depth': 40, 'subsample': 0.40215523793332864, 'colsample_bytree': 0.713106288724184, 'learning_rate': 0.011360932343432495, 'reg_lambda': 4.194971408763891, 'reg_alpha': 12.453628120183076, 'min_child_samples': 54, 'num_leaves': 26, 'max_bin': 583, 'cat_smooth': 27, 'cat_l2': 10.704608986666877}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.6918 || TEST RMSE : 0.72063


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:01:41,077][0m Trial 27 finished with value: 0.72054 and parameters: {'max_depth': 45, 'subsample': 0.2666207280715861, 'colsample_bytree': 0.3979229934773975, 'learning_rate': 0.014759462354479117, 'reg_lambda': 21.966943441500547, 'reg_alpha': 23.76458629063557, 'min_child_samples': 8, 'num_leaves': 71, 'max_bin': 776, 'cat_smooth': 10, 'cat_l2': 0.1871790725564414}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68566 || TEST RMSE : 0.72054


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:03:05,420][0m Trial 28 finished with value: 0.72074 and parameters: {'max_depth': 36, 'subsample': 0.4836265923416308, 'colsample_bytree': 0.5169651866605316, 'learning_rate': 0.01310822164797896, 'reg_lambda': 7.590093066011252, 'reg_alpha': 17.125255109912896, 'min_child_samples': 44, 'num_leaves': 112, 'max_bin': 956, 'cat_smooth': 35, 'cat_l2': 6.632605951617733}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.67118 || TEST RMSE : 0.72074


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:04:06,001][0m Trial 29 finished with value: 0.72029 and parameters: {'max_depth': 26, 'subsample': 0.5505074977689524, 'colsample_bytree': 0.30392546870902953, 'learning_rate': 0.018647234334410883, 'reg_lambda': 15.596265542623966, 'reg_alpha': 8.810023763474732, 'min_child_samples': 25, 'num_leaves': 198, 'max_bin': 901, 'cat_smooth': 46, 'cat_l2': 0.3396805641031043}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.66315 || TEST RMSE : 0.72029


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:07:06,087][0m Trial 30 finished with value: 0.71933 and parameters: {'max_depth': 47, 'subsample': 0.7654414438645958, 'colsample_bytree': 0.2017474057929701, 'learning_rate': 0.007667131655766272, 'reg_lambda': 3.733048946904602, 'reg_alpha': 49.92094982948064, 'min_child_samples': 60, 'num_leaves': 42, 'max_bin': 735, 'cat_smooth': 24, 'cat_l2': 1.3055122149499103}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69686 || TEST RMSE : 0.71933


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:09:40,109][0m Trial 31 finished with value: 0.71919 and parameters: {'max_depth': 42, 'subsample': 0.41259778599053265, 'colsample_bytree': 0.20520038075454483, 'learning_rate': 0.008900799244108394, 'reg_lambda': 10.54003833286476, 'reg_alpha': 20.493490829872385, 'min_child_samples': 17, 'num_leaves': 31, 'max_bin': 822, 'cat_smooth': 18, 'cat_l2': 0.8931546419802718}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69402 || TEST RMSE : 0.71919


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:11:51,334][0m Trial 32 finished with value: 0.7193 and parameters: {'max_depth': 42, 'subsample': 0.33599732380099856, 'colsample_bytree': 0.2474740732246089, 'learning_rate': 0.0094008045184289, 'reg_lambda': 10.100156071154919, 'reg_alpha': 18.62288271639465, 'min_child_samples': 16, 'num_leaves': 26, 'max_bin': 815, 'cat_smooth': 13, 'cat_l2': 3.9488775195984873}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.6954 || TEST RMSE : 0.7193


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:14:12,633][0m Trial 33 finished with value: 0.71982 and parameters: {'max_depth': 47, 'subsample': 0.4411890946018858, 'colsample_bytree': 0.31766162253623137, 'learning_rate': 0.008087139012689277, 'reg_lambda': 14.259217766877352, 'reg_alpha': 22.600984447824203, 'min_child_samples': 35, 'num_leaves': 48, 'max_bin': 873, 'cat_smooth': 22, 'cat_l2': 0.643134825255739}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68931 || TEST RMSE : 0.71982


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:16:16,730][0m Trial 34 finished with value: 0.71909 and parameters: {'max_depth': 38, 'subsample': 0.37758486291648596, 'colsample_bytree': 0.2001530926045126, 'learning_rate': 0.011477011337424722, 'reg_lambda': 2.9496955350267013, 'reg_alpha': 32.7605850298968, 'min_child_samples': 14, 'num_leaves': 21, 'max_bin': 639, 'cat_smooth': 55, 'cat_l2': 0.20221569042903398}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69959 || TEST RMSE : 0.71909


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:18:55,794][0m Trial 35 finished with value: 0.71941 and parameters: {'max_depth': 38, 'subsample': 0.27938724366885326, 'colsample_bytree': 0.2537487510753467, 'learning_rate': 0.011498612787230583, 'reg_lambda': 0.06445896767355741, 'reg_alpha': 33.698286319206765, 'min_child_samples': 46, 'num_leaves': 20, 'max_bin': 622, 'cat_smooth': 54, 'cat_l2': 0.05075132040622935}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69662 || TEST RMSE : 0.71941


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:21:32,844][0m Trial 36 finished with value: 0.71982 and parameters: {'max_depth': 46, 'subsample': 0.3674339313245385, 'colsample_bytree': 0.3810799318283806, 'learning_rate': 0.012932561436666028, 'reg_lambda': 3.6273530426689438, 'reg_alpha': 38.872469156248144, 'min_child_samples': 56, 'num_leaves': 11, 'max_bin': 563, 'cat_smooth': 68, 'cat_l2': 0.11321415838759923}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.70181 || TEST RMSE : 0.71982


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:23:29,287][0m Trial 37 finished with value: 0.71974 and parameters: {'max_depth': 39, 'subsample': 0.6056669514185613, 'colsample_bytree': 0.2347796200571446, 'learning_rate': 0.009711381115959606, 'reg_lambda': 19.41062910446518, 'reg_alpha': 26.163366422178164, 'min_child_samples': 11, 'num_leaves': 68, 'max_bin': 663, 'cat_smooth': 56, 'cat_l2': 0.23586643545054786}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68771 || TEST RMSE : 0.71974


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:25:22,018][0m Trial 38 finished with value: 0.72116 and parameters: {'max_depth': 33, 'subsample': 0.24797594511362775, 'colsample_bytree': 0.5057605875065739, 'learning_rate': 0.01118470791014212, 'reg_lambda': 2.082637078279319, 'reg_alpha': 32.926844309490576, 'min_child_samples': 26, 'num_leaves': 151, 'max_bin': 542, 'cat_smooth': 85, 'cat_l2': 1.9200684282966698}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.67448 || TEST RMSE : 0.72116


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:26:58,843][0m Trial 39 finished with value: 0.72057 and parameters: {'max_depth': 21, 'subsample': 0.4588367666126144, 'colsample_bytree': 0.7084275665915543, 'learning_rate': 0.013831747551054218, 'reg_lambda': 5.923769680022978, 'reg_alpha': 41.051900183458585, 'min_child_samples': 63, 'num_leaves': 21, 'max_bin': 487, 'cat_smooth': 49, 'cat_l2': 0.4702274395686788}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69764 || TEST RMSE : 0.72057


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:27:46,283][0m Trial 40 finished with value: 0.71989 and parameters: {'max_depth': 42, 'subsample': 0.20921296434199443, 'colsample_bytree': 0.2658458840993267, 'learning_rate': 0.017413012313986825, 'reg_lambda': 9.650557068362472, 'reg_alpha': 6.906443962776353, 'min_child_samples': 5, 'num_leaves': 58, 'max_bin': 653, 'cat_smooth': 70, 'cat_l2': 0.02676077126268952}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68801 || TEST RMSE : 0.71989


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:30:27,928][0m Trial 41 finished with value: 0.7192 and parameters: {'max_depth': 41, 'subsample': 0.31590788379241946, 'colsample_bytree': 0.20000693463243632, 'learning_rate': 0.007142291805146709, 'reg_lambda': 10.43271418773328, 'reg_alpha': 12.451138675783046, 'min_child_samples': 13, 'num_leaves': 32, 'max_bin': 745, 'cat_smooth': 56, 'cat_l2': 0.10406662914431655}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.69232 || TEST RMSE : 0.7192


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:32:54,963][0m Trial 42 finished with value: 0.71986 and parameters: {'max_depth': 34, 'subsample': 0.36997848146856216, 'colsample_bytree': 0.3229910661352174, 'learning_rate': 0.008204383518694408, 'reg_lambda': 0.0430479076990542, 'reg_alpha': 16.947539037084518, 'min_child_samples': 17, 'num_leaves': 43, 'max_bin': 729, 'cat_smooth': 15, 'cat_l2': 0.9936951740449463}. Best is trial 15 with value: 0.719.[0m


TRAIN RMSE : 0.68468 || TEST RMSE : 0.71986


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:35:55,589][0m Trial 43 finished with value: 0.71893 and parameters: {'max_depth': 44, 'subsample': 0.394545907670217, 'colsample_bytree': 0.20198138209747638, 'learning_rate': 0.009310766402801046, 'reg_lambda': 6.237661450596901, 'reg_alpha': 22.879691155166864, 'min_child_samples': 32, 'num_leaves': 17, 'max_bin': 797, 'cat_smooth': 81, 'cat_l2': 3.716241852773303}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.6977 || TEST RMSE : 0.71893


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:38:23,920][0m Trial 44 finished with value: 0.71936 and parameters: {'max_depth': 47, 'subsample': 0.3127043579678655, 'colsample_bytree': 0.28539867315846956, 'learning_rate': 0.010367599438060869, 'reg_lambda': 2.473765675278724, 'reg_alpha': 28.218707623961762, 'min_child_samples': 34, 'num_leaves': 19, 'max_bin': 624, 'cat_smooth': 86, 'cat_l2': 3.2075042744034965}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.69762 || TEST RMSE : 0.71936


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:40:55,917][0m Trial 45 finished with value: 0.7196 and parameters: {'max_depth': 44, 'subsample': 0.4956386880102689, 'colsample_bytree': 0.22628261825721158, 'learning_rate': 0.009373719349874458, 'reg_lambda': 5.9142702966057135, 'reg_alpha': 25.35754541109696, 'min_child_samples': 40, 'num_leaves': 19, 'max_bin': 337, 'cat_smooth': 99, 'cat_l2': 34.37058227811838}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.69997 || TEST RMSE : 0.7196


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:43:09,244][0m Trial 46 finished with value: 0.71921 and parameters: {'max_depth': 50, 'subsample': 0.525070518497827, 'colsample_bytree': 0.2801394820943529, 'learning_rate': 0.012230470130642346, 'reg_lambda': 18.13031134948581, 'reg_alpha': 22.652852361191183, 'min_child_samples': 29, 'num_leaves': 12, 'max_bin': 801, 'cat_smooth': 79, 'cat_l2': 1.6421255047466654}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.70165 || TEST RMSE : 0.71921


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:45:16,098][0m Trial 47 finished with value: 0.71917 and parameters: {'max_depth': 28, 'subsample': 0.38403472873789624, 'colsample_bytree': 0.20098486474014152, 'learning_rate': 0.010754100006837323, 'reg_lambda': 49.147284257318084, 'reg_alpha': 31.046818657712663, 'min_child_samples': 52, 'num_leaves': 27, 'max_bin': 679, 'cat_smooth': 43, 'cat_l2': 0.0019710705402769765}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.69746 || TEST RMSE : 0.71917


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:47:21,164][0m Trial 48 finished with value: 0.71953 and parameters: {'max_depth': 18, 'subsample': 0.4452153671306576, 'colsample_bytree': 0.23557959160762562, 'learning_rate': 0.010022685952593374, 'reg_lambda': 46.94927257769855, 'reg_alpha': 34.15623516085317, 'min_child_samples': 69, 'num_leaves': 49, 'max_bin': 685, 'cat_smooth': 78, 'cat_l2': 0.0026550046029441324}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.69275 || TEST RMSE : 0.71953


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.
[32m[I 2021-08-25 20:50:13,400][0m Trial 49 finished with value: 0.71949 and parameters: {'max_depth': 28, 'subsample': 0.5807950384301813, 'colsample_bytree': 0.33002006796193084, 'learning_rate': 0.010641056628858293, 'reg_lambda': 36.06690443440692, 'reg_alpha': 30.009217036543262, 'min_child_samples': 52, 'num_leaves': 10, 'max_bin': 716, 'cat_smooth': 44, 'cat_l2': 0.004829284369929262}. Best is trial 43 with value: 0.71893.[0m


TRAIN RMSE : 0.70157 || TEST RMSE : 0.71949
Wall time: 1h 37min 45s


In [73]:
#From the above optuna trials the best parameters i could find were the following ones!

study_lgbm.best_params

{'max_depth': 44,
 'subsample': 0.394545907670217,
 'colsample_bytree': 0.20198138209747638,
 'learning_rate': 0.009310766402801046,
 'reg_lambda': 6.237661450596901,
 'reg_alpha': 22.879691155166864,
 'min_child_samples': 32,
 'num_leaves': 17,
 'max_bin': 797,
 'cat_smooth': 81,
 'cat_l2': 3.716241852773303}

In [74]:
lgbm_params = {
 'max_depth': 44,
 'subsample': 0.394545907670217,
 'colsample_bytree': 0.20198138209747638,
 'learning_rate': 0.009310766402801046,
 'reg_lambda': 6.237661450596901,
 'reg_alpha': 22.879691155166864,
 'min_child_samples': 32,
 'num_leaves': 17,
 'max_bin': 797,
 'cat_smooth': 81,
 'cat_l2': 3.716241852773303,
 'metric': 'rmse', 
 'n_jobs': -1, 
 'n_estimators': 20000
}

In [75]:
%%time
# creating a pre trained model to use in objective.
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
lgbm = LGBMRegressor(**lgbm_params).fit(X_tr, y_tr, eval_set=[(X_val, y_val)],
                          eval_metric=['rmse'],
                          early_stopping_rounds=250, 
                          categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          #categorical_feature=categorical_feature,
                          verbose=0)

Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


Wall time: 1min 52s


In [89]:
def objective_extremefine(trial, X, y, model, iterations=5):

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)
    
    f1 = trial.suggest_uniform('f1', 0.1, 1.0)
    f2 = trial.suggest_uniform('f2', 0.1, 3)
    f3 = trial.suggest_int('f3', 20, 100)
    f4 = trial.suggest_int('f4', 20, 50)
    f5 = trial.suggest_int('f5', 1, 5)
    lr_factor = trial.suggest_uniform('lr_factor', 0.1, 0.7)
    
    params = lgbm_params.copy()
        
    #print(f'RMSE for base model is {np.sqrt(mean_squared_error(y_val, model.predict(X_val)))}')

    for i in range(1, iterations):
        if i > 2:
            params['reg_lambda'] *=  f1
            params['reg_alpha'] += f2
            params['num_leaves'] += f3
            params['min_child_samples'] -= f4
            params['cat_smooth'] -= f5
            params['learning_rate'] *= lr_factor
            #params['max_depth'] += f5

       
        params['learning_rate'] = params['learning_rate'] if params['learning_rate'] > 0.0009 else 0.0009
        # need to stop learning rate to reduce to a very insignificant value, hence we use this threshold

        model = LGBMRegressor(**params).fit(X_train, y_train, eval_set=[(X_val, y_val)],
                          eval_metric=['rmse'],
                          early_stopping_rounds=200, 
                          categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          verbose=1000,
                          init_model=model if i > 1 else lgbm)# we will use pre trained model for first iteration
     
        print(f'RMSE for {i}th model is {np.sqrt(mean_squared_error(y_val, model.predict(X_val)))}')
           
              
    RMSE = mean_squared_error(y_val, model.predict(X_val), squared=False)
    return RMSE

In [90]:
study_ef = optuna.create_study(direction='minimize')
optimize = partial(objective_extremefine, X=X_train, y=y_train, model=LGBMRegressor)
study_ef.optimize(optimize, n_trials=50)

[32m[I 2021-08-26 09:15:33,951][0m A new study created in memory with name: no-name-9b720c0a-feea-4d08-be01-78d2d73b1033[0m
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[12677]	valid_0's rmse: 0.718929
RMSE for 1th model is 0.7189292573511086


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[12761]	valid_0's rmse: 0.718927
RMSE for 2th model is 0.7189274958430608


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[12784]	valid_0's rmse: 0.718927


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[33m[W 2021-08-26 09:19:23,503][0m Trial 0 failed because of the following error: LightGBMError('Check failed: (min_data_in_leaf) >= (0) at D:\\a\\1\\s\\python-package\\compile\\src\\io\\config_auto.cpp, line 340 .\n')
Traceback (most recent call last):
  File "C:\Users\johnny.horita\AppData\Roaming\Python\Python38\site-packages\optuna\_optimize.py", line 216, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-89-54b979f31a13>", line 30, in objective_extremefine
    model = LGBMRegressor(**params).fit(X_train, y_train, eval_set=[(X_val, y_val)],
  File "C:\Users\johnny.horita\AppData\Roaming\Python\Python38\site-packages\lightgbm\sklearn.py", line 818, in fit
    super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
  File "C:\Users\johnny.horita\AppData\Roaming\Python\Python38\site-packages\lightgbm\sklearn.

RMSE for 3th model is 0.7189272709164225


LightGBMError: Check failed: (min_data_in_leaf) >= (0) at D:\a\1\s\python-package\compile\src\io\config_auto.cpp, line 340 .


In [None]:
study_ef.best_params

# Step 4: Train a model

In [None]:
lgbm_params = {'max_depth': 16, 
                'subsample': 0.8032697250789377, 
                'colsample_bytree': 0.21067140508531404, 
                'learning_rate': 0.009867383057779643,
                'reg_lambda': 10.987474846877767, 
                'reg_alpha': 17.335285595031994, 
                'min_child_samples': 31, 
                'num_leaves': 66, 
                'max_bin': 522, 
                'cat_smooth': 81, 
                'cat_l2': 0.029690334194270022, 
                'metric': 'rmse', 
                'n_jobs': -1, 
                'n_estimators': 20000}

In [53]:
%%time
split = KFold(n_splits=10, shuffle=True)
#split = KFold(n_splits=5, random_state=2, shuffle=True)

preds_list_base = []
preds_list_final_iteration = []
preds_list_all = []

for train_idx, val_idx in split.split(X_train):
            X_tr = X_train.iloc[train_idx]
            X_val = X_train.iloc[val_idx]
            y_tr = y_train.iloc[train_idx]
            y_val = y_train.iloc[val_idx]
            
            Model = LGBMRegressor(**lgbm_params).fit(X_tr, y_tr, eval_set=[(X_val, y_val)],
                          eval_metric=['rmse'],
                          early_stopping_rounds=250, 
                          categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
                          verbose=0)
            
            preds_list_base.append(Model.predict(X_test))
            preds_list_all.append(Model.predict(X_test))
            print(f'RMSE for Base model is {np.sqrt(mean_squared_error(y_val, Model.predict(X_val)))}')
            first_rmse = np.sqrt(mean_squared_error(y_val, Model.predict(X_val)))
            params = lgbm_params.copy()
            
            for i in range(1, 8):
                if i >2:    
                    
                    # reducing regularizing params if 
                    
                    params['reg_lambda'] *= 0.9
                    params['reg_alpha'] *= 0.9
                    params['num_leaves'] += 40
                    
                params['learning_rate'] = 0.003
                Model = LGBMRegressor(**params).fit(X_tr, y_tr, eval_set=[(X_val, y_val)],
                          eval_metric=['rmse'],
                          early_stopping_rounds=200, 
                          categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
                          verbose=0,
                          init_model=Model)
                
                preds_list_all.append(Model.predict(X_test))
                print(f'RMSE for Incremental trial {i} model is {np.sqrt(mean_squared_error(y_val, Model.predict(X_val)))}')
            last_rmse = np.sqrt(mean_squared_error(y_val, Model.predict(X_val)))
            print('',end='\n\n')
            print(f'Improvement of : {first_rmse - last_rmse}')
            print('-' * 100)
            preds_list_final_iteration.append(Model.predict(X_test))

Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7107550230541116


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7107522072192628


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7107514377729396


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7107199011671975


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7107171056410114


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7107137446433405


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7107133524542298


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7107127516435672


Improvement of : 4.227141054446992e-05
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7178790959367608


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7178746656789096


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7178657681348584


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7178620958746957


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7178530106830172


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7178431908705689


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7178364527548072


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7178358906203893


Improvement of : 4.320531637147518e-05
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.713169592205707


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7131635739168113


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.713162593578703


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.713149644795821


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7131295115977128


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7131227115807596


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7131224547579654


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7131223563571909


Improvement of : 4.7235848516069545e-05
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7165225980108092


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7165173393729258


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7164585737270974


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7163135818065217


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7163124805502543


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7163116978926112


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7163040225307279


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7162966450509988


Improvement of : 0.00022595295981042707
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7185812951809598


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7185410960991686


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7185273938526354


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.718476482793457


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7184499432885094


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7184006024802664


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7183970650736516


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7183790412934661


Improvement of : 0.00020225388749373074
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7217255664091201


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7217220610019895


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7216926205174095


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7216901185817074


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.721645845780683


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7216222522182223


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.721598187943286


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7215981966412467


Improvement of : 0.0001273697678734198
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.720839533253733


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7208392972771374


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7208392696507492


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7208033475415004


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7207845527714749


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7207568817358531


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7207449832811916


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.720731238380962


Improvement of : 0.00010829487277097538
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7177175842475385


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7177159317340958


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.717714879509399


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7176963856767526


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7176754065137368


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7176716738435617


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7176665282555491


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7176626993858923


Improvement of : 5.488486164617612e-05
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7148641442621122


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7148368177139752


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7148307618715389


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7148309109865238


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7148284618683469


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7148270237862858


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.7148234872125555


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.7148053967944848


Improvement of : 5.874746762735139e-05
----------------------------------------------------------------------------------------------------


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Base model is 0.7183935491204166


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 1 model is 0.7183858674146388


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 2 model is 0.7183080455031281


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 3 model is 0.7183080525792207


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 4 model is 0.7183029175465351


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 5 model is 0.7182904643089428


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 6 model is 0.718283670130113


Using categorical_feature in Dataset.
categorical_feature in Dataset is overridden.
New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Overriding the parameters from Reference Dataset.
categorical_column in param dict is overridden.


RMSE for Incremental trial 7 model is 0.718276637520159


Improvement of : 0.00011691160025761871
----------------------------------------------------------------------------------------------------
Wall time: 2h 58min 4s


# Step 5: Submit to the competition

In [54]:
y_preds_base = np.array(preds_list_base).mean(axis=0)
y_preds_base

array([8.05990673, 8.37679118, 8.38446779, ..., 8.48578847, 8.14250875,
       7.95256521])

In [55]:
y_preds_all = np.array(preds_list_all).mean(axis=0)
y_preds_all

array([8.06085681, 8.37750632, 8.38505738, ..., 8.48934653, 8.14526674,
       7.94932529])

In [56]:
y_preds_final_iteration = np.array(preds_list_final_iteration).mean(axis=0)
y_preds_final_iteration

array([8.06052724, 8.37609817, 8.38610211, ..., 8.49178426, 8.15044823,
       7.94514232])

In [57]:
# Use the model to generate predictions
#predictions = model.predict(test)

# Save the predictions to a CSV file
output = pd.DataFrame({'Id': test.index,
                       'target': y_preds_final_iteration})
output.to_csv('submission.csv', index=False)