### Setup

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import seaborn
%matplotlib inline

# project paths
project_root_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)

data_path = os.path.join(project_root_dir, "data")
os.makedirs(data_path, exist_ok=True)

# function for reading data
def read_data(filename, date_cols=None, file_path=data_path):
    csv_path = os.path.join(file_path, filename)
    return pd.read_csv(csv_path, parse_dates=date_cols)

# function for saving data as csv file
def save_dataframe(df, filename, file_path=data_path):
    path = os.path.join(file_path, filename)
    df.to_csv(path, index=False)

### Read And Prepare Data

In [2]:
train = read_data("TRAIN.CSV", date_cols=["Date"])
test = read_data("TEST_FINAL.csv", date_cols=["Date"])
submission = read_data("SAMPLE.csv")

In [3]:
from prepare import prepare_data

In [4]:
X_train, y_train, X_test, full_pipe = prepare_data(train, test)

## Lightgbm Models

In [6]:
from lightgbm import LGBMRegressor
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline

In [7]:
lgbm = make_pipeline(full_pipe, LGBMRegressor(random_state=42, n_jobs=-1))
scores = cross_val_score(lgbm, X_train, y_train, cv=5, scoring="neg_mean_squared_log_error")
print("Scores:", -scores)
print("Average score:", np.mean(-scores))

Scores: [0.09295144 0.07219783 0.10141561 0.20279369 0.11582319]
Average score: 0.11703635212917407


In [8]:
lgbm.fit(X_train, y_train)
submission['Sales'] = lgbm.predict(X_test)
save_dataframe(submission,"lgbm_default.csv")

In [9]:
from sklearn.model_selection import GridSearchCV

params = {"lgbmregressor__max_depth":list(range(5,40)),
         "lgbmregressor__num_leaves":list(range(25,50))}

lgb_model = make_pipeline(full_pipe, LGBMRegressor(random_state=42, n_jobs=-1))

lgb_grid = GridSearchCV(estimator=lgb_model,param_grid=params, 
                        cv=3, scoring="neg_mean_squared_log_error",
                        verbose=2)
lgb_grid.fit(X_train, y_train)

Fitting 3 folds for each of 875 candidates, totalling 2625 fits
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=5, lgbmregressor__num_leaves=28; total tim

[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=33; total time=   0.7s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=6, lgbmregressor__nu

[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=7, lgbmregressor__nu

[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=8, lgbmregressor__nu

[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=29; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=29; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=29; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=10, lgbmr

[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=34; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=34; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=35; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=36; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=36; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=36; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=11, lgbmr

[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=43; total time=   0.5s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=43; total time=   0.5s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=12, lgbmr

[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=48; total time=   0.5s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=49; total time=   0.5s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=13, lgbmregressor__num_leaves=49; total time=   0.5s
[CV] END lgbmregressor__max_depth=14, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=14, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=14, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=14, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=14, lgbmr

[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=29; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=29; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmr

Traceback (most recent call last):
  File "C:\Users\BHOLA\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\BHOLA\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "C:\Users\BHOLA\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 242, in _score
    return self._sign * self._score_func(y_true, y_pred,
  File "C:\Users\BHOLA\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "C:\Users\BHOLA\anaconda3\lib\site-packages\sklearn\metrics\_regression.py", line 413, in mean_squared_log_error
    raise ValueError("Mean Squared Logarithmic Error cannot be used when "
ValueError: Mean Squared Logarithmic Error cannot be used when targets contain negative values.



[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=43; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=45; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=45; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=15, lgbmr

[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=17, lgbmr

[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=31; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=32; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=18, lgbmregressor__num_leaves=35; total time=   0.7s
[CV] END lgbmregressor__max_depth=18, lgbmr

[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=39; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=19, lgbmr

[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=20, lgbmr

[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=27; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=28; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=28; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmregressor__num_leaves=30; total time=   0.6s
[CV] END lgbmregressor__max_depth=22, lgbmr

[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=33; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=34; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=23, lgbmr

[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=40; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=41; total time=   0.7s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=24, lgbmr

[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=25, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=26, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=26, lgbmr

[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=28; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=28; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=29; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=30; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=30; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=30; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=31; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=31; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmregressor__num_leaves=31; total time=   0.6s
[CV] END lgbmregressor__max_depth=27, lgbmr

[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=35; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=36; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=37; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=37; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=37; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=28, lgbmr

[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=41; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=42; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=43; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=29, lgbmr

[CV] END lgbmregressor__max_depth=30, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=30, lgbmregressor__num_leaves=48; total time=   0.6s
[CV] END lgbmregressor__max_depth=30, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=30, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=30, lgbmregressor__num_leaves=49; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=25; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=25; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmregressor__num_leaves=26; total time=   0.6s
[CV] END lgbmregressor__max_depth=31, lgbmr

[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=30; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=33; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmregressor__num_leaves=33; total time=   0.5s
[CV] END lgbmregressor__max_depth=32, lgbmr

[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=36; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=37; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=38; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=38; total time=   0.6s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=38; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmregressor__num_leaves=40; total time=   0.5s
[CV] END lgbmregressor__max_depth=33, lgbmr

[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=43; total time=   0.5s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=43; total time=   0.5s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=44; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=44; total time=   0.5s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=34, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=34, lgbmr

[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=25; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=26; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=27; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmregressor__num_leaves=28; total time=   0.5s
[CV] END lgbmregressor__max_depth=36, lgbmr

[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=31; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=32; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=33; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=33; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=33; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=34; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=34; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=34; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmregressor__num_leaves=35; total time=   0.5s
[CV] END lgbmregressor__max_depth=37, lgbmr

[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=38; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=38; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=39; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=40; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=40; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=40; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=41; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=41; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmregressor__num_leaves=41; total time=   0.5s
[CV] END lgbmregressor__max_depth=38, lgbmr

[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=45; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=45; total time=   0.6s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=45; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=46; total time=   0.6s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=46; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=47; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=47; total time=   0.6s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=48; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmregressor__num_leaves=48; total time=   0.5s
[CV] END lgbmregressor__max_depth=39, lgbmr

 -0.11887463 -0.1182892  -0.1182892  -0.1182892  -0.1182892  -0.1182892
 -0.1182892  -0.1182892  -0.1182892  -0.1182892  -0.1182892  -0.1182892
 -0.1182892  -0.1182892  -0.1182892  -0.1182892  -0.1182892  -0.1182892
 -0.1182892  -0.12103868 -0.12048351 -0.11973906 -0.11923068 -0.11938858
 -0.12014714 -0.11857168 -0.1189194  -0.11923931 -0.11976868 -0.11939209
 -0.1191314  -0.11952086 -0.1183795  -0.11839345 -0.11892134 -0.11892692
 -0.11799272 -0.11805943 -0.11884813 -0.11889704 -0.11922128 -0.11885205
 -0.11884116 -0.11913376 -0.1200796  -0.12071923 -0.12057685 -0.12043408
 -0.12056633 -0.1207446  -0.11987874 -0.12042812 -0.12010285 -0.12069703
 -0.12019368 -0.12001896 -0.11977561 -0.11969364 -0.11959888 -0.11941799
 -0.11987001 -0.11961656 -0.11923287 -0.11947768 -0.11854445 -0.11893966
 -0.11896577 -0.11903461 -0.11840516 -0.1208911  -0.12110362 -0.12048722
 -0.12043    -0.12061803 -0.12111054 -0.12071552 -0.12172394 -0.12013971
 -0.12095972 -0.12093196 -0.12144139 -0.12056651 -0.12

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('num',
                                                                         Pipeline(steps=[('simpleimputer',
                                                                                          SimpleImputer())]),
                                                                         ['Store_id']),
                                                                        ('cat',
                                                                         Pipeline(steps=[('simpleimputer',
                                                                                          SimpleImputer(fill_value='NA',
                                                                                                        strategy='constant')),
                                                                                         ('onehotenc

In [10]:
lgb_grid.best_score_

-0.11799272253994697

In [11]:
lgb_grid.best_params_

{'lgbmregressor__max_depth': 6, 'lgbmregressor__num_leaves': 42}

In [18]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal

In [26]:
params = {"lgbmregressor__learning_rate":reciprocal(0.1,0.2)}

lgb_model = make_pipeline(full_pipe, LGBMRegressor(random_state=42, n_jobs=-1))

lgb_rnd_search = RandomizedSearchCV(lgb_model,
                                    param_distributions=params,
                                    scoring="neg_mean_squared_log_error",
                                   cv=3,n_iter=30,verbose=2)

lgb_rnd_search.fit(X_train, y_train)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] END ....lgbmregressor__learning_rate=0.1333279286448707; total time=   0.6s
[CV] END ....lgbmregressor__learning_rate=0.1333279286448707; total time=   0.6s
[CV] END ....lgbmregressor__learning_rate=0.1333279286448707; total time=   0.6s
[CV] END .....lgbmregressor__learning_rate=0.149564291548245; total time=   0.5s
[CV] END .....lgbmregressor__learning_rate=0.149564291548245; total time=   0.6s
[CV] END .....lgbmregressor__learning_rate=0.149564291548245; total time=   0.6s
[CV] END ....lgbmregressor__learning_rate=0.1518875467129308; total time=   0.6s
[CV] END ....lgbmregressor__learning_rate=0.1518875467129308; total time=   0.6s
[CV] END ....lgbmregressor__learning_rate=0.1518875467129308; total time=   0.6s
[CV] END ...lgbmregressor__learning_rate=0.11548403409263382; total time=   0.6s
[CV] END ...lgbmregressor__learning_rate=0.11548403409263382; total time=   0.6s
[CV] END ...lgbmregressor__learning_rate=0.11548

RandomizedSearchCV(cv=3,
                   estimator=Pipeline(steps=[('columntransformer',
                                              ColumnTransformer(transformers=[('num',
                                                                               Pipeline(steps=[('simpleimputer',
                                                                                                SimpleImputer())]),
                                                                               ['Store_id']),
                                                                              ('cat',
                                                                               Pipeline(steps=[('simpleimputer',
                                                                                                SimpleImputer(fill_value='NA',
                                                                                                              strategy='constant')),
                                        

In [28]:
lgb_rnd_search.best_score_

-0.11984681422640898

In [29]:
lgb_rnd_search.best_params_

{'lgbmregressor__learning_rate': 0.14127961459711447}

In [30]:
lgbm = make_pipeline(full_pipe, LGBMRegressor(random_state=42,max_depth=6,num_leaves=42, n_jobs=-1))
scores = cross_val_score(lgbm, X_train, y_train, cv=5, scoring="neg_mean_squared_log_error")
print("Scores:", -scores)
print("Average score:", np.mean(-scores))

Scores: [0.09396422 0.07373503 0.09141141 0.20329892 0.11596022]
Average score: 0.11567396008982248


In [38]:
from scipy.stats import randint

params = {"lgbmregressor__n_estimators":randint(low=100,high=1000)}

lgb_model = make_pipeline(full_pipe, LGBMRegressor(random_state=42,max_depth=6,num_leaves=42, n_jobs=-1))

lgb_rnd_search = RandomizedSearchCV(lgb_model,
                                    param_distributions=params,
                                    scoring="neg_mean_squared_log_error",
                                   cv=3,n_iter=30,verbose=2)

lgb_rnd_search.fit(X_train, y_train)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] END ....................lgbmregressor__n_estimators=461; total time=   1.2s
[CV] END ....................lgbmregressor__n_estimators=461; total time=   1.2s
[CV] END ....................lgbmregressor__n_estimators=461; total time=   1.4s
[CV] END ....................lgbmregressor__n_estimators=653; total time=   1.5s
[CV] END ....................lgbmregressor__n_estimators=653; total time=   1.5s
[CV] END ....................lgbmregressor__n_estimators=653; total time=   1.5s
[CV] END ....................lgbmregressor__n_estimators=492; total time=   1.3s
[CV] END ....................lgbmregressor__n_estimators=492; total time=   1.3s
[CV] END ....................lgbmregressor__n_estimators=492; total time=   1.3s
[CV] END ....................lgbmregressor__n_estimators=910; total time=   2.1s
[CV] END ....................lgbmregressor__n_estimators=910; total time=   2.0s
[CV] END ....................lgbmregressor__n_es

RandomizedSearchCV(cv=3,
                   estimator=Pipeline(steps=[('columntransformer',
                                              ColumnTransformer(transformers=[('num',
                                                                               Pipeline(steps=[('simpleimputer',
                                                                                                SimpleImputer())]),
                                                                               ['Store_id']),
                                                                              ('cat',
                                                                               Pipeline(steps=[('simpleimputer',
                                                                                                SimpleImputer(fill_value='NA',
                                                                                                              strategy='constant')),
                                        

In [39]:
lgb_rnd_search.best_score_

-0.11763934885412547

In [40]:
lgb_rnd_search.best_params_

{'lgbmregressor__n_estimators': 132}

In [41]:
lgb_model = make_pipeline(full_pipe, LGBMRegressor(random_state=42,
                                                   n_estimators=132,
                                                   max_depth=6,
                                                   num_leaves=42,
                                                   n_jobs=-1))

lgb_model.fit(X_train, y_train)
submission['Sales'] = lgb_model.predict(X_test)
save_dataframe(submission,"lgbm_hyper1.csv")