### Import packages

In [34]:
!pip install lightgbm



In [35]:
from datetime import date, timedelta, datetime
import gc
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from tqdm import tqdm

### Import training data

In [36]:
tic = datetime.now()

#Import data from 2017-01-01 and beyond

df_2017 = pd.read_csv(
    '../input/train.csv', usecols=[1, 2, 3, 4, 5],
    dtype={'onpromotion': bool},
    converters={'unit_sales': lambda u: np.log1p(float(u)) if float(u) > 0 else 0},
    parse_dates=["date"],
    skiprows=range(1, 101688780))

toc = datetime.now()
print(toc - tic)

0:02:10.721559


In [37]:
df_2017.head()

Unnamed: 0,date,store_nbr,item_nbr,unit_sales,onpromotion
0,2017-01-01,25,99197,0.693147,False
1,2017-01-01,25,103665,2.079442,False
2,2017-01-01,25,105574,0.693147,False
3,2017-01-01,25,105857,1.609438,False
4,2017-01-01,25,106716,1.098612,False


In [38]:
df_2017.shape

(23808261, 5)

### Import items and stores data

In [39]:
items = pd.read_csv("../input/items.csv").set_index("item_nbr")
stores = pd.read_csv("../input/stores.csv").set_index("store_nbr")

### Import and modify promotion data

In [40]:
promo_2017 = df_2017.set_index(["store_nbr", "item_nbr", "date"])[["onpromotion"]].unstack(level=-1).fillna(False)

In [41]:
promo_2017.columns = promo_2017.columns.get_level_values(1)

In [42]:
promo_2017.head()

Unnamed: 0_level_0,date,2017-01-01 00:00:00,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-07 00:00:00,2017-01-08 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,...,2017-08-06 00:00:00,2017-08-07 00:00:00,2017-08-08 00:00:00,2017-08-09 00:00:00,2017-08-10 00:00:00,2017-08-11 00:00:00,2017-08-12 00:00:00,2017-08-13 00:00:00,2017-08-14 00:00:00,2017-08-15 00:00:00
store_nbr,item_nbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,96995,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,99197,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,103520,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,103665,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,105574,False,False,True,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Transform sales training data

In [43]:
df_2017 = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["unit_sales"]].unstack(
        level=-1).fillna(0)
df_2017.columns = df_2017.columns.get_level_values(1)

In [44]:
df_2017.head()

Unnamed: 0_level_0,date,2017-01-01 00:00:00,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-07 00:00:00,2017-01-08 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,...,2017-08-06 00:00:00,2017-08-07 00:00:00,2017-08-08 00:00:00,2017-08-09 00:00:00,2017-08-10 00:00:00,2017-08-11 00:00:00,2017-08-12 00:00:00,2017-08-13 00:00:00,2017-08-14 00:00:00,2017-08-15 00:00:00
store_nbr,item_nbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,96995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.098612,1.098612,0.0,0.0,0.693147,0.0,0.0,0.0,0.0,0.0
1,99197,0.0,0.0,1.386294,0.693147,0.693147,0.693147,1.098612,0.0,0.0,0.693147,...,0.0,1.098612,0.0,1.098612,0.0,0.0,0.0,0.0,0.0,0.0
1,103520,0.0,0.693147,1.098612,0.0,1.098612,1.386294,0.693147,0.0,0.693147,0.693147,...,0.0,0.0,1.386294,0.0,1.386294,0.693147,0.693147,0.693147,0.0,0.0
1,103665,0.0,0.0,0.0,1.386294,1.098612,1.098612,0.693147,1.098612,0.0,2.079442,...,0.693147,1.098612,0.0,2.079442,2.302585,1.098612,0.0,0.0,0.693147,0.693147
1,105574,0.0,0.0,1.791759,2.564949,2.302585,1.94591,1.609438,1.098612,1.386294,2.302585,...,0.0,1.791759,2.079442,1.94591,2.397895,1.791759,1.791759,0.0,1.386294,1.609438


In [45]:
# save index for future use
df_2017_index = df_2017.index

### Transform items data

In [46]:
items['class'] = items['class'].astype('category')
items = pd.get_dummies(items)

In [47]:
items.head()

Unnamed: 0_level_0,perishable,family_AUTOMOTIVE,family_BABY CARE,family_BEAUTY,family_BEVERAGES,family_BOOKS,family_BREAD/BAKERY,family_CELEBRATION,family_CLEANING,family_DAIRY,...,class_6920,class_6922,class_6924,class_6936,class_6954,class_6960,class_7002,class_7016,class_7034,class_7780
item_nbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
96995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
99197,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
103501,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
103520,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
103665,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
items = items.reindex(df_2017.index.get_level_values(1))
items.shape

(167515, 371)

### Transform Stores data

In [49]:
stores.head()

Unnamed: 0_level_0,city,state,type,cluster
store_nbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Quito,Pichincha,D,13
2,Quito,Pichincha,D,13
3,Quito,Pichincha,D,8
4,Quito,Pichincha,D,9
5,Santo Domingo,Santo Domingo de los Tsachilas,D,4


In [50]:
stores['cluster'] = stores.cluster.astype('category')
stores = pd.get_dummies(stores)

In [51]:
stores = stores.reindex(df_2017.index.get_level_values(0))
stores.shape

(167515, 60)

In [52]:
stores.head()

Unnamed: 0_level_0,city_Ambato,city_Babahoyo,city_Cayambe,city_Cuenca,city_Daule,city_El Carmen,city_Esmeraldas,city_Guaranda,city_Guayaquil,city_Ibarra,...,cluster_8,cluster_9,cluster_10,cluster_11,cluster_12,cluster_13,cluster_14,cluster_15,cluster_16,cluster_17
store_nbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


### Functions for Feature Engineering

In [53]:
def get_timespan(df, dt, minus, periods, freq='D'):
    return df[pd.date_range(dt - timedelta(days=minus), periods=periods, freq=freq)]

In [54]:
def prepare_dataset(df, promo_df, t2017, is_train=True, name_prefix=None):
    X = {
        
        # Number of promotion in the last x days 
        "promo_7": get_timespan(promo_df, t2017, 7, 7).sum(axis=1).values,
        "promo_14": get_timespan(promo_df, t2017, 14, 14).sum(axis=1).values,
        "promo_30": get_timespan(promo_df, t2017, 30, 30).sum(axis=1).values,
        
        # Number of promotion in the next x days of reference date
        "promo_3_aft": get_timespan(promo_df, t2017 + timedelta(days=15), 14, 3).sum(axis=1).values,
        "promo_7_aft": get_timespan(promo_df, t2017 + timedelta(days=15), 14, 7).sum(axis=1).values,
        "promo_14_aft": get_timespan(promo_df, t2017 + timedelta(days=15), 14, 14).sum(axis=1).values,
    }
    
    # Mean sales in the next x days with or without promotions
    for i in [3, 7, 14, 30]:
        tmp1 = get_timespan(df, t2017, i, i)
        tmp2 = (get_timespan(promo_df, t2017, i, i) > 0) * 1

    # with promotion
        X[f'has_promo_mean_{i}'] = (tmp1 * tmp2.replace(0, np.nan)).mean(axis=1).values
    # without promotion
        X[f'no_promo_mean_{i}'] = (tmp1 * (1 - tmp2).replace(0, np.nan)).mean(axis=1).values
                

    for i in [3, 7, 14, 30]:
        tmp = get_timespan(df, t2017, i, i)
        # mean daily difference in sales in the last x days
        X[f'diff_{i}_mean'] = tmp.diff(axis=1).mean(axis=1).values
        # mean sales in the last x days
        X[f'mean_{i}'] = tmp.mean(axis=1).values
        # median sales in the last x days
        X[f'median_{i}'] = tmp.median(axis=1).values
        # min sales in the last x days
        X[f'min_{i}'] = tmp.min(axis=1).values
        # max sales in the last x days
        X[f'max_{i}'] = tmp.max(axis=1).values
        # std dev sales in the last x days
        X[f'std_{i}'] = tmp.std(axis=1).values

    for i in [7, 14, 30]:
        tmp = get_timespan(df, t2017, i, i)
        # number of days with sales in the last x days
        X[f'has_sales_days_in_last_{i}'] = (tmp > 0).sum(axis=1).values
        # last day with sales in the last x days
        X[f'last_has_sales_day_in_last_{i}'] = i - ((tmp > 0) * np.arange(i)).max(axis=1).values
        # first of days with sales in the last x days
        X[f'first_has_sales_day_in_last_{i}'] = ((tmp > 0) * np.arange(i, 0, -1)).max(axis=1).values

        tmp = get_timespan(promo_df, t2017, i, i)
        # number of days with promotions in the last x days
        X[f'has_promo_days_in_last_{i}'] = (tmp > 0).sum(axis=1).values
        # last day has promotion in the last x days
        X[f'last_has_promo_day_in_last_{i}'] = i - ((tmp > 0) * np.arange(i)).max(axis=1).values
        # first day has promotion in the last x days
        X[f'first_has_promo_day_in_last_{i}'] = ((tmp > 0) * np.arange(i, 0, -1)).max(axis=1).values

    tmp = get_timespan(promo_df, t2017 + timedelta(days=15), 14, 14)
    # last day that has promotion in the next 14 days (8/2 to 8/15)
    # Count backwards: if last day is 8/15, then value = 1
    # if last day is 8/2, then value = 14
    X['last_has_promo_day_in_after_14_days'] = 14 - ((tmp > 0) * np.arange(14)).max(axis=1).values
    # first day that has promotion in the next 14 days (8/2 to 8/15)
    X['first_has_promo_day_in_after_14_days'] = ((tmp > 0) * np.arange(14, 0, -1)).max(axis=1).values

    # sale on day x days from reference date 
    for i in range(1, 15):
        X[f'day_{i}'] = get_timespan(df, t2017, i, 1).values.ravel()
    
    # average sales on day of the week for the last 4 or 20 weeks
    for i in range(7):
        X[f'mean_4_dow{i}'] = get_timespan(df, t2017, 28-i, 4, freq='7D').mean(axis=1).values
        X[f'mean_20_dow{i}'] = get_timespan(df, t2017, 140-i, 20, freq='7D').mean(axis=1).values        
    
    # promotion status of each day 14 days before and 14 days after the reference date
    
    for i in range(-14, 15):
        X[f'promo_{i}'] = promo_df[t2017 + timedelta(days=i)].values.astype(np.uint8)

    X = pd.DataFrame(X)

    if name_prefix is not None:
        X.columns = [f'{name_prefix}_{c}' for c in X.columns]
    
    if is_train:
        y = df[pd.date_range(t2017, periods=15)].values
        return X, y
    
    return X

### Prepare training data

In [55]:
print("Preparing dataset...")
t2017 = date(2017, 5, 30)
num_days = 4
X_l, y_l = [], []
for i in tqdm(range(num_days)):
    delta = timedelta(days=7 * i)
    X_tmp, y_tmp = prepare_dataset(df_2017, promo_2017, t2017 + delta)
    X_tmp = pd.concat([X_tmp, items.reset_index(drop=True), stores.reset_index(drop=True)], axis=1)
    X_l.append(X_tmp)
    y_l.append(y_tmp)

X_train = pd.concat(X_l, axis=0)
y_train = np.concatenate(y_l, axis=0)

del X_l, y_l
gc.collect()

  0%|          | 0/4 [00:00<?, ?it/s]

Preparing dataset...


100%|██████████| 4/4 [01:07<00:00, 16.83s/it]


0

### Prepare validation and test data

In [56]:
X_val, y_val = prepare_dataset(df_2017, promo_2017, date(2017, 7, 11))
X_val = pd.concat([X_val, items.reset_index(drop=True), stores.reset_index(drop=True)], axis=1)

X_test = prepare_dataset(df_2017, promo_2017, date(2017, 8, 1), is_train=False)
X_test = pd.concat([X_test, items.reset_index(drop=True), stores.reset_index(drop=True)], axis=1)

In [57]:
#Check shapes to ensure consistency
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape) 
print(X_test.shape)

(670060, 544) (670060, 15)
(167515, 544) (167515, 15)
(167515, 544)


In [58]:
del df_2017, promo_2017
gc.collect()

0

In [59]:
import itertools

In [60]:
#create prarmeter grid
feature_fraction_grid = [0.8,0.9,1]
bagging_fraction_grid = [0.6,0.8,1]
num_leaves_grid = [31, 200] #31 is the default

param_grid = list(itertools.product(num_leaves_grid, feature_fraction_grid, bagging_fraction_grid))

In [61]:
param_grid

[(31, 0.8, 0.6),
 (31, 0.8, 0.8),
 (31, 0.8, 1),
 (31, 0.9, 0.6),
 (31, 0.9, 0.8),
 (31, 0.9, 1),
 (31, 1, 0.6),
 (31, 1, 0.8),
 (31, 1, 1),
 (200, 0.8, 0.6),
 (200, 0.8, 0.8),
 (200, 0.8, 1),
 (200, 0.9, 0.6),
 (200, 0.9, 0.8),
 (200, 0.9, 1),
 (200, 1, 0.6),
 (200, 1, 0.8),
 (200, 1, 1)]

In [62]:
folder_path = '../model_results/2020-01-08/'

In [63]:
# Change column names, don't know why but got error msg without change
# LightGBMError: Do not support special JSON characters in feature name.

X_train.columns = ["".join(c if c.isalnum() else "_" for c in str(x)) for x in X_train.columns]
X_val.columns = X_train.columns
X_test.columns = X_train.columns

### Run #1 (Run in seperate cells to prevent total loss in crash)

In [64]:
val_results_run_1 = []


start_value = 0
end_value = 6

for num, grid_item in enumerate(param_grid[start_value:end_value],start_value): 
              
    params = {
        'num_leaves': grid_item[0],
        'objective': 'regression',
        'learning_rate': 0.02,
        'feature_fraction': grid_item[1],
        'bagging_fraction': grid_item[2],
        'bagging_freq': 1,
        'metric': 'l2',
        'num_threads': 0
    }

    print("*" * 50)
    print(f'Model {num}')
    print(f'num_leaves: {params["num_leaves"]}')
    print(f'feature_fraction: {params["feature_fraction"]}')
    print(f'bagging_fraction: {params["bagging_fraction"]}')
    print("*" * 50)
    
    MAX_ROUNDS = 5000
    val_pred = []
    test_pred = []
    feature_importance = []
    
    for i in range(15):
        tic = datetime.now()
        print("-" * 50)
        print(f'Step {i+1}')
        print("-" * 50)
        dtrain = lgb.Dataset(
            X_train, label=y_train[:, i],
            weight=pd.concat([items["perishable"]] * num_days) * 0.25 + 1
        )
        dval = lgb.Dataset(
            X_val, label=y_val[:, i], reference=dtrain,
            weight=items["perishable"] * 0.25 + 1
        )
        bst = lgb.train(
            params, dtrain, num_boost_round=MAX_ROUNDS,
            valid_sets=[dtrain, dval], early_stopping_rounds=125, verbose_eval=50
        )
        
        feature_importance.append(bst.feature_importance("gain"))
              
        #predict with validation data
        val_pred.append(bst.predict(
            X_val, num_iteration=bst.best_iteration or MAX_ROUNDS))

        #predict with test data
        test_pred.append(bst.predict(
            X_test, num_iteration=bst.best_iteration or MAX_ROUNDS))
        toc = datetime.now()
        print(f'model runtime: {toc-tic}')

    #calculate MSE
    print("Validation mse:", mean_squared_error(
        y_val, np.array(val_pred).transpose()))
    
    #calculate validation error
    weight = items["perishable"] * 0.25 + 1
    err = (y_val - np.array(val_pred).transpose())**2
    err = err.sum(axis=1) * weight
    err = np.sqrt(err.sum() / weight.sum() / 15)
    print(f'nwrmsle = {err}')
    val_results_run_1.append(err)

    #save data
    df_preds_test = pd.DataFrame(
        np.array(test_pred).transpose(), index=df_2017_index,
        columns=pd.date_range("2017-08-01", periods=15)
    )
    df_preds_test.index.set_names(["store_nbr", "item_nbr"], inplace=True)
    df_preds_test.to_csv(folder_path + 'lgbm_test_pred_model_' + str(num) +'.csv')
    
    #save featue_importance
    df_feature_importance = pd.DataFrame(np.array(feature_importance).transpose(),
                                         index = X_train.columns,
                                        columns = pd.date_range('2017-08-01', periods=15))
    df_feature_importance.to_csv(folder_path + 'lgbm_feature_importance_model_' + str(num) +'.csv')

print(val_results_run_1)
          


**************************************************
Model 0
num_leaves: 31
feature_fraction: 0.8
bagging_fraction: 0.6
**************************************************
--------------------------------------------------
Step 1
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.434532	valid_1's l2: 0.417308
[100]	training's l2: 0.335156	valid_1's l2: 0.323898
[150]	training's l2: 0.316806	valid_1's l2: 0.308512
[200]	training's l2: 0.311411	valid_1's l2: 0.304738
[250]	training's l2: 0.308682	valid_1's l2: 0.303199
[300]	training's l2: 0.306661	valid_1's l2: 0.302151
[350]	training's l2: 0.305284	valid_1's l2: 0.301639
[400]	training's l2: 0.304166	valid_1's l2: 0.301325
[450]	training's l2: 0.30322	valid_1's l2: 0.301071
[500]	training's l2: 0.302336	valid_1's l2: 0.300865
[550]	training's l2: 0.301536	valid_1's l2: 0.300749
[600]	training's l2: 0.300762	valid_1's l2: 0.300643
[650]	training's l2: 0.300

[1200]	training's l2: 0.316831	valid_1's l2: 0.325813
[1250]	training's l2: 0.316224	valid_1's l2: 0.325758
[1300]	training's l2: 0.315626	valid_1's l2: 0.325713
[1350]	training's l2: 0.315055	valid_1's l2: 0.325704
[1400]	training's l2: 0.314484	valid_1's l2: 0.325725
Early stopping, best iteration is:
[1303]	training's l2: 0.315582	valid_1's l2: 0.325697
model runtime: 0:01:27.279771
--------------------------------------------------
Step 5
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.50223	valid_1's l2: 0.473824
[100]	training's l2: 0.386732	valid_1's l2: 0.37055
[150]	training's l2: 0.365683	valid_1's l2: 0.354916
[200]	training's l2: 0.359066	valid_1's l2: 0.351291
[250]	training's l2: 0.355514	valid_1's l2: 0.349516
[300]	training's l2: 0.35278	valid_1's l2: 0.348065
[350]	training's l2: 0.350823	valid_1's l2: 0.347385
[400]	training's l2: 0.34921	valid_1's l2: 0.346838
[450]	training's l2: 

Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.459955	valid_1's l2: 0.458909
[100]	training's l2: 0.361004	valid_1's l2: 0.361723
[150]	training's l2: 0.341333	valid_1's l2: 0.342851
[200]	training's l2: 0.334831	valid_1's l2: 0.337031
[250]	training's l2: 0.331118	valid_1's l2: 0.334169
[300]	training's l2: 0.328644	valid_1's l2: 0.332517
[350]	training's l2: 0.326946	valid_1's l2: 0.331761
[400]	training's l2: 0.325445	valid_1's l2: 0.331153
[450]	training's l2: 0.324298	valid_1's l2: 0.330868
[500]	training's l2: 0.323204	valid_1's l2: 0.330549
[550]	training's l2: 0.322216	valid_1's l2: 0.330383
[600]	training's l2: 0.321308	valid_1's l2: 0.330201
[650]	training's l2: 0.320483	valid_1's l2: 0.330085
[700]	training's l2: 0.319718	valid_1's l2: 0.329986
[750]	training's l2: 0.318959	valid_1's l2: 0.329955
[800]	training's l2: 0.318235	valid_1's l2: 0.329929
[850]	training's l2: 0.317503	valid_1's l2: 0.329887
[900]	training's l2: 0.316829	valid_1'

Early stopping, best iteration is:
[1459]	training's l2: 0.350092	valid_1's l2: 0.376789
model runtime: 0:01:33.632830
--------------------------------------------------
Step 13
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.541575	valid_1's l2: 0.516647
[100]	training's l2: 0.421345	valid_1's l2: 0.409425
[150]	training's l2: 0.397841	valid_1's l2: 0.392188
[200]	training's l2: 0.389734	valid_1's l2: 0.388215
[250]	training's l2: 0.384717	valid_1's l2: 0.386263
[300]	training's l2: 0.381317	valid_1's l2: 0.385264
[350]	training's l2: 0.378965	valid_1's l2: 0.384935
[400]	training's l2: 0.376987	valid_1's l2: 0.3846
[450]	training's l2: 0.375278	valid_1's l2: 0.384593
[500]	training's l2: 0.373786	valid_1's l2: 0.384596
[550]	training's l2: 0.372441	valid_1's l2: 0.384511
[600]	training's l2: 0.371194	valid_1's l2: 0.384478
[650]	training's l2: 0.370014	valid_1's l2: 0.384589
[700]	training's l2: 0.

model runtime: 0:01:41.238512
--------------------------------------------------
Step 3
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.442946	valid_1's l2: 0.425045
[100]	training's l2: 0.358521	valid_1's l2: 0.348039
[150]	training's l2: 0.342952	valid_1's l2: 0.335919
[200]	training's l2: 0.337913	valid_1's l2: 0.332739
[250]	training's l2: 0.335166	valid_1's l2: 0.33121
[300]	training's l2: 0.333053	valid_1's l2: 0.330105
[350]	training's l2: 0.331581	valid_1's l2: 0.329523
[400]	training's l2: 0.330376	valid_1's l2: 0.329185
[450]	training's l2: 0.329379	valid_1's l2: 0.329019
[500]	training's l2: 0.328467	valid_1's l2: 0.328906
[550]	training's l2: 0.327634	valid_1's l2: 0.328777
[600]	training's l2: 0.326822	valid_1's l2: 0.328706
[650]	training's l2: 0.326049	valid_1's l2: 0.328625
[700]	training's l2: 0.325314	valid_1's l2: 0.328614
[750]	training's l2: 0.324594	valid_1's l2: 0.32858
[800]	t

[50]	training's l2: 0.525205	valid_1's l2: 0.508191
[100]	training's l2: 0.401464	valid_1's l2: 0.390535
[150]	training's l2: 0.378023	valid_1's l2: 0.369724
[200]	training's l2: 0.370488	valid_1's l2: 0.363934
[250]	training's l2: 0.366097	valid_1's l2: 0.360751
[300]	training's l2: 0.362715	valid_1's l2: 0.358612
[350]	training's l2: 0.360236	valid_1's l2: 0.357548
[400]	training's l2: 0.358294	valid_1's l2: 0.356864
[450]	training's l2: 0.356759	valid_1's l2: 0.356417
[500]	training's l2: 0.355413	valid_1's l2: 0.356084
[550]	training's l2: 0.354106	valid_1's l2: 0.355792
[600]	training's l2: 0.352962	valid_1's l2: 0.355596
[650]	training's l2: 0.351851	valid_1's l2: 0.355393
[700]	training's l2: 0.350863	valid_1's l2: 0.355221
[750]	training's l2: 0.3499	valid_1's l2: 0.355082
[800]	training's l2: 0.348964	valid_1's l2: 0.35499
[850]	training's l2: 0.348116	valid_1's l2: 0.354896
[900]	training's l2: 0.347275	valid_1's l2: 0.354824
[950]	training's l2: 0.346502	valid_1's l2: 0.3547

[1600]	training's l2: 0.309058	valid_1's l2: 0.329563
[1650]	training's l2: 0.308559	valid_1's l2: 0.329542
[1700]	training's l2: 0.308103	valid_1's l2: 0.329576
[1750]	training's l2: 0.307617	valid_1's l2: 0.329556
Early stopping, best iteration is:
[1648]	training's l2: 0.308582	valid_1's l2: 0.329533
model runtime: 0:01:51.675591
--------------------------------------------------
Step 10
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.446569	valid_1's l2: 0.465562
[100]	training's l2: 0.364942	valid_1's l2: 0.376098
[150]	training's l2: 0.348907	valid_1's l2: 0.358554
[200]	training's l2: 0.343306	valid_1's l2: 0.353341
[250]	training's l2: 0.340226	valid_1's l2: 0.351094
[300]	training's l2: 0.337777	valid_1's l2: 0.349876
[350]	training's l2: 0.336087	valid_1's l2: 0.349164
[400]	training's l2: 0.334752	valid_1's l2: 0.348754
[450]	training's l2: 0.333592	valid_1's l2: 0.348496
[500]	training's 

[100]	training's l2: 0.400937	valid_1's l2: 0.409468
[150]	training's l2: 0.382523	valid_1's l2: 0.393958
[200]	training's l2: 0.3759	valid_1's l2: 0.390281
[250]	training's l2: 0.371938	valid_1's l2: 0.38851
[300]	training's l2: 0.369307	valid_1's l2: 0.387526
[350]	training's l2: 0.367316	valid_1's l2: 0.387162
[400]	training's l2: 0.365754	valid_1's l2: 0.387014
[450]	training's l2: 0.36435	valid_1's l2: 0.38695
[500]	training's l2: 0.36312	valid_1's l2: 0.387124
[550]	training's l2: 0.362013	valid_1's l2: 0.387085
Early stopping, best iteration is:
[444]	training's l2: 0.364516	valid_1's l2: 0.386818
model runtime: 0:00:54.753931
--------------------------------------------------
Step 15
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.480912	valid_1's l2: 0.470387
[100]	training's l2: 0.389883	valid_1's l2: 0.388637
[150]	training's l2: 0.372032	valid_1's l2: 0.375907
[200]	training's l2: 0.36581

[300]	training's l2: 0.333056	valid_1's l2: 0.330091
[350]	training's l2: 0.331588	valid_1's l2: 0.3295
[400]	training's l2: 0.330457	valid_1's l2: 0.329251
[450]	training's l2: 0.329467	valid_1's l2: 0.328999
[500]	training's l2: 0.328654	valid_1's l2: 0.328962
[550]	training's l2: 0.327864	valid_1's l2: 0.328897
[600]	training's l2: 0.327125	valid_1's l2: 0.328868
[650]	training's l2: 0.326462	valid_1's l2: 0.328804
[700]	training's l2: 0.325836	valid_1's l2: 0.328814
[750]	training's l2: 0.325197	valid_1's l2: 0.328793
[800]	training's l2: 0.324589	valid_1's l2: 0.328777
[850]	training's l2: 0.323992	valid_1's l2: 0.328783
[900]	training's l2: 0.323457	valid_1's l2: 0.328767
[950]	training's l2: 0.322904	valid_1's l2: 0.328758
[1000]	training's l2: 0.32239	valid_1's l2: 0.32877
[1050]	training's l2: 0.321863	valid_1's l2: 0.328769
Early stopping, best iteration is:
[951]	training's l2: 0.322894	valid_1's l2: 0.328758
model runtime: 0:01:19.280300
------------------------------------

[250]	training's l2: 0.355236	valid_1's l2: 0.354871
[300]	training's l2: 0.352852	valid_1's l2: 0.353633
[350]	training's l2: 0.351042	valid_1's l2: 0.352953
[400]	training's l2: 0.349621	valid_1's l2: 0.352475
[450]	training's l2: 0.348487	valid_1's l2: 0.352286
[500]	training's l2: 0.347501	valid_1's l2: 0.352068
[550]	training's l2: 0.346564	valid_1's l2: 0.351943
[600]	training's l2: 0.345742	valid_1's l2: 0.351787
[650]	training's l2: 0.34496	valid_1's l2: 0.351698
[700]	training's l2: 0.344191	valid_1's l2: 0.351627
[750]	training's l2: 0.343433	valid_1's l2: 0.351517
[800]	training's l2: 0.342709	valid_1's l2: 0.351489
[850]	training's l2: 0.342079	valid_1's l2: 0.351467
[900]	training's l2: 0.341444	valid_1's l2: 0.351436
[950]	training's l2: 0.340842	valid_1's l2: 0.351395
[1000]	training's l2: 0.340301	valid_1's l2: 0.351369
[1050]	training's l2: 0.339675	valid_1's l2: 0.3513
[1100]	training's l2: 0.339094	valid_1's l2: 0.351278
[1150]	training's l2: 0.338538	valid_1's l2: 0

[1750]	training's l2: 0.320253	valid_1's l2: 0.344081
[1800]	training's l2: 0.319831	valid_1's l2: 0.344048
[1850]	training's l2: 0.319421	valid_1's l2: 0.34405
[1900]	training's l2: 0.318984	valid_1's l2: 0.344059
Early stopping, best iteration is:
[1812]	training's l2: 0.319718	valid_1's l2: 0.344031
model runtime: 0:01:56.822070
--------------------------------------------------
Step 12
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.523273	valid_1's l2: 0.492954
[100]	training's l2: 0.409936	valid_1's l2: 0.398737
[150]	training's l2: 0.388334	valid_1's l2: 0.385438
[200]	training's l2: 0.380873	valid_1's l2: 0.382329
[250]	training's l2: 0.376614	valid_1's l2: 0.380605
[300]	training's l2: 0.373403	valid_1's l2: 0.37939
[350]	training's l2: 0.37114	valid_1's l2: 0.378915
[400]	training's l2: 0.369328	valid_1's l2: 0.378412
[450]	training's l2: 0.367775	valid_1's l2: 0.378102
[500]	training's l2:

Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.442927	valid_1's l2: 0.417773
[100]	training's l2: 0.339197	valid_1's l2: 0.322079
[150]	training's l2: 0.319532	valid_1's l2: 0.306201
[200]	training's l2: 0.313419	valid_1's l2: 0.30197
[250]	training's l2: 0.310314	valid_1's l2: 0.300138
[300]	training's l2: 0.308269	valid_1's l2: 0.299085
[350]	training's l2: 0.306872	valid_1's l2: 0.298568
[400]	training's l2: 0.305724	valid_1's l2: 0.298263
[450]	training's l2: 0.304732	valid_1's l2: 0.297997
[500]	training's l2: 0.303885	valid_1's l2: 0.297835
[550]	training's l2: 0.303055	valid_1's l2: 0.297687
[600]	training's l2: 0.30233	valid_1's l2: 0.297547
[650]	training's l2: 0.30163	valid_1's l2: 0.297476
[700]	training's l2: 0.300952	valid_1's l2: 0.297416
[750]	training's l2: 0.300276	valid_1's l2: 0.297313
[800]	training's l2: 0.299634	valid_1's l2: 0.297272
[850]	training's l2: 0.299019	valid_1's l2: 0.297203
[900]	training's l2: 0.298415	valid_1's l

[1450]	training's l2: 0.332379	valid_1's l2: 0.345287
[1500]	training's l2: 0.331779	valid_1's l2: 0.345191
[1550]	training's l2: 0.331227	valid_1's l2: 0.345189
[1600]	training's l2: 0.330669	valid_1's l2: 0.345166
[1650]	training's l2: 0.330085	valid_1's l2: 0.345136
[1700]	training's l2: 0.329531	valid_1's l2: 0.345147
[1750]	training's l2: 0.328996	valid_1's l2: 0.345175
Early stopping, best iteration is:
[1638]	training's l2: 0.330221	valid_1's l2: 0.345118
model runtime: 0:01:45.743871
--------------------------------------------------
Step 6
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.524537	valid_1's l2: 0.507562
[100]	training's l2: 0.401224	valid_1's l2: 0.390117
[150]	training's l2: 0.377907	valid_1's l2: 0.36954
[200]	training's l2: 0.370407	valid_1's l2: 0.363671
[250]	training's l2: 0.366016	valid_1's l2: 0.360646
[300]	training's l2: 0.362522	valid_1's l2: 0.358568
[350]	training's

[1600]	training's l2: 0.308721	valid_1's l2: 0.329678
[1650]	training's l2: 0.308215	valid_1's l2: 0.329646
[1700]	training's l2: 0.30772	valid_1's l2: 0.329631
[1750]	training's l2: 0.307243	valid_1's l2: 0.329598
[1800]	training's l2: 0.306752	valid_1's l2: 0.329576
[1850]	training's l2: 0.306289	valid_1's l2: 0.329561
[1900]	training's l2: 0.305811	valid_1's l2: 0.32957
[1950]	training's l2: 0.30534	valid_1's l2: 0.329574
[2000]	training's l2: 0.304879	valid_1's l2: 0.329565
Early stopping, best iteration is:
[1877]	training's l2: 0.306031	valid_1's l2: 0.329553
model runtime: 0:01:55.713286
--------------------------------------------------
Step 10
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.445884	valid_1's l2: 0.464491
[100]	training's l2: 0.364827	valid_1's l2: 0.375793
[150]	training's l2: 0.34882	valid_1's l2: 0.358536
[200]	training's l2: 0.343368	valid_1's l2: 0.353278
[250]	training's

Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.494999	valid_1's l2: 0.500055
[100]	training's l2: 0.400699	valid_1's l2: 0.409096
[150]	training's l2: 0.382234	valid_1's l2: 0.393443
[200]	training's l2: 0.37578	valid_1's l2: 0.389757
[250]	training's l2: 0.372041	valid_1's l2: 0.388223
[300]	training's l2: 0.369249	valid_1's l2: 0.387539
[350]	training's l2: 0.367257	valid_1's l2: 0.386896
[400]	training's l2: 0.365684	valid_1's l2: 0.386653
[450]	training's l2: 0.364326	valid_1's l2: 0.386621
[500]	training's l2: 0.363078	valid_1's l2: 0.386694
[550]	training's l2: 0.362014	valid_1's l2: 0.386506
[600]	training's l2: 0.360976	valid_1's l2: 0.386642
[650]	training's l2: 0.360007	valid_1's l2: 0.386624
Early stopping, best iteration is:
[550]	training's l2: 0.362014	valid_1's l2: 0.386506
model runtime: 0:00:57.464538
--------------------------------------------------
Step 15
--------------------------------------------------
Training until validati

[600]	training's l2: 0.326723	valid_1's l2: 0.32853
[650]	training's l2: 0.325936	valid_1's l2: 0.328473
[700]	training's l2: 0.325213	valid_1's l2: 0.328494
[750]	training's l2: 0.324527	valid_1's l2: 0.328469
[800]	training's l2: 0.323862	valid_1's l2: 0.328451
[850]	training's l2: 0.323195	valid_1's l2: 0.328399
[900]	training's l2: 0.322575	valid_1's l2: 0.328385
[950]	training's l2: 0.321949	valid_1's l2: 0.328362
[1000]	training's l2: 0.321297	valid_1's l2: 0.328349
[1050]	training's l2: 0.320687	valid_1's l2: 0.328357
[1100]	training's l2: 0.320125	valid_1's l2: 0.328337
[1150]	training's l2: 0.319566	valid_1's l2: 0.328323
[1200]	training's l2: 0.319019	valid_1's l2: 0.328288
[1250]	training's l2: 0.318417	valid_1's l2: 0.328292
[1300]	training's l2: 0.317862	valid_1's l2: 0.328278
[1350]	training's l2: 0.317311	valid_1's l2: 0.328284
[1400]	training's l2: 0.316772	valid_1's l2: 0.328297
Early stopping, best iteration is:
[1276]	training's l2: 0.318117	valid_1's l2: 0.328267
mo

[1800]	training's l2: 0.335294	valid_1's l2: 0.354239
[1850]	training's l2: 0.33474	valid_1's l2: 0.354261
[1900]	training's l2: 0.334192	valid_1's l2: 0.354271
Early stopping, best iteration is:
[1788]	training's l2: 0.335425	valid_1's l2: 0.354215
model runtime: 0:02:13.008105
--------------------------------------------------
Step 7
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.4786	valid_1's l2: 0.47978
[100]	training's l2: 0.383311	valid_1's l2: 0.381593
[150]	training's l2: 0.364918	valid_1's l2: 0.362815
[200]	training's l2: 0.358928	valid_1's l2: 0.357545
[250]	training's l2: 0.355398	valid_1's l2: 0.355171
[300]	training's l2: 0.352858	valid_1's l2: 0.353698
[350]	training's l2: 0.351034	valid_1's l2: 0.352914
[400]	training's l2: 0.349593	valid_1's l2: 0.352541
[450]	training's l2: 0.348395	valid_1's l2: 0.352305
[500]	training's l2: 0.347235	valid_1's l2: 0.352088
[550]	training's l2: 0.

Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.474561	valid_1's l2: 0.47106
[100]	training's l2: 0.37568	valid_1's l2: 0.375722
[150]	training's l2: 0.355534	valid_1's l2: 0.357854
[200]	training's l2: 0.348242	valid_1's l2: 0.352046
[250]	training's l2: 0.344183	valid_1's l2: 0.349258
[300]	training's l2: 0.341217	valid_1's l2: 0.347545
[350]	training's l2: 0.339073	valid_1's l2: 0.346631
[400]	training's l2: 0.337365	valid_1's l2: 0.345993
[450]	training's l2: 0.336001	valid_1's l2: 0.345676
[500]	training's l2: 0.334707	valid_1's l2: 0.345392
[550]	training's l2: 0.333627	valid_1's l2: 0.345226
[600]	training's l2: 0.332632	valid_1's l2: 0.345089
[650]	training's l2: 0.331777	valid_1's l2: 0.345031
[700]	training's l2: 0.330875	valid_1's l2: 0.34484
[750]	training's l2: 0.330033	valid_1's l2: 0.344716
[800]	training's l2: 0.329231	valid_1's l2: 0.34465
[850]	training's l2: 0.328487	valid_1's l2: 0.344573
[900]	training's l2: 0.327735	valid_1's l2

[1150]	training's l2: 0.295594	valid_1's l2: 0.30034
[1200]	training's l2: 0.295194	valid_1's l2: 0.300325
[1250]	training's l2: 0.294805	valid_1's l2: 0.300304
[1300]	training's l2: 0.294406	valid_1's l2: 0.300304
[1350]	training's l2: 0.293975	valid_1's l2: 0.300266
[1400]	training's l2: 0.293536	valid_1's l2: 0.30025
[1450]	training's l2: 0.293182	valid_1's l2: 0.300238
[1500]	training's l2: 0.292768	valid_1's l2: 0.300217
[1550]	training's l2: 0.292363	valid_1's l2: 0.300184
[1600]	training's l2: 0.291965	valid_1's l2: 0.300181
[1650]	training's l2: 0.291596	valid_1's l2: 0.300172
[1700]	training's l2: 0.291193	valid_1's l2: 0.300148
[1750]	training's l2: 0.290821	valid_1's l2: 0.300131
[1800]	training's l2: 0.290442	valid_1's l2: 0.300124
[1850]	training's l2: 0.290055	valid_1's l2: 0.300108
[1900]	training's l2: 0.289669	valid_1's l2: 0.300087
[1950]	training's l2: 0.28931	valid_1's l2: 0.300076
[2000]	training's l2: 0.288946	valid_1's l2: 0.300075
[2050]	training's l2: 0.288576	

[1400]	training's l2: 0.335096	valid_1's l2: 0.346169
[1450]	training's l2: 0.334602	valid_1's l2: 0.346178
[1500]	training's l2: 0.334113	valid_1's l2: 0.346171
Early stopping, best iteration is:
[1385]	training's l2: 0.335238	valid_1's l2: 0.346152
model runtime: 0:01:40.069665
--------------------------------------------------
Step 6
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.524674	valid_1's l2: 0.507606
[100]	training's l2: 0.401349	valid_1's l2: 0.390507
[150]	training's l2: 0.378053	valid_1's l2: 0.369705
[200]	training's l2: 0.370565	valid_1's l2: 0.363871
[250]	training's l2: 0.366103	valid_1's l2: 0.360449
[300]	training's l2: 0.362637	valid_1's l2: 0.358392
[350]	training's l2: 0.360337	valid_1's l2: 0.357323
[400]	training's l2: 0.358417	valid_1's l2: 0.356659
[450]	training's l2: 0.356827	valid_1's l2: 0.356242
[500]	training's l2: 0.355452	valid_1's l2: 0.355882
[550]	training's l2

[50]	training's l2: 0.474516	valid_1's l2: 0.47124
[100]	training's l2: 0.375854	valid_1's l2: 0.376168
[150]	training's l2: 0.355848	valid_1's l2: 0.358309
[200]	training's l2: 0.34854	valid_1's l2: 0.352415
[250]	training's l2: 0.344285	valid_1's l2: 0.349413
[300]	training's l2: 0.341437	valid_1's l2: 0.347639
[350]	training's l2: 0.339175	valid_1's l2: 0.346461
[400]	training's l2: 0.337484	valid_1's l2: 0.345946
[450]	training's l2: 0.336112	valid_1's l2: 0.345673
[500]	training's l2: 0.334911	valid_1's l2: 0.345366
[550]	training's l2: 0.333838	valid_1's l2: 0.345188
[600]	training's l2: 0.332937	valid_1's l2: 0.345039
[650]	training's l2: 0.332131	valid_1's l2: 0.34497
[700]	training's l2: 0.331361	valid_1's l2: 0.344875
[750]	training's l2: 0.330652	valid_1's l2: 0.344814
[800]	training's l2: 0.329964	valid_1's l2: 0.344776
[850]	training's l2: 0.329321	valid_1's l2: 0.344767
[900]	training's l2: 0.328704	valid_1's l2: 0.344731
[950]	training's l2: 0.328117	valid_1's l2: 0.3447

NameError: name 'out_path' is not defined

In [66]:
with open(folder_path + 'val_results_run_1.txt', 'w') as filehandle:
    for val_result in val_results_run_1:
        filehandle.write(f'{val_result}\n')

### Run #2

In [68]:
val_results_run_2 = []

start_value = 6
end_value = 12

for num, grid_item in enumerate(param_grid[start_value:end_value],start_value): 
              
    params = {
        'num_leaves': grid_item[0],
        'objective': 'regression',
        'learning_rate': 0.02,
        'feature_fraction': grid_item[1],
        'bagging_fraction': grid_item[2],
        'bagging_freq': 1,
        'metric': 'l2',
        'num_threads': 0
    }

    print("*" * 50)
    print(f'Model {num}')
    print(f'num_leaves: {params["num_leaves"]}')
    print(f'feature_fraction: {params["feature_fraction"]}')
    print(f'bagging_fraction: {params["bagging_fraction"]}')
    print("*" * 50)
    
    MAX_ROUNDS = 5000
    val_pred = []
    test_pred = []
    feature_importance = []
    
    for i in range(15):
        tic = datetime.now()
        print("-" * 50)
        print(f'Step {i+1}')
        print("-" * 50)
        dtrain = lgb.Dataset(
            X_train, label=y_train[:, i],
            weight=pd.concat([items["perishable"]] * num_days) * 0.25 + 1
        )
        dval = lgb.Dataset(
            X_val, label=y_val[:, i], reference=dtrain,
            weight=items["perishable"] * 0.25 + 1
        )
        bst = lgb.train(
            params, dtrain, num_boost_round=MAX_ROUNDS,
            valid_sets=[dtrain, dval], early_stopping_rounds=125, verbose_eval=50
        )
        
        feature_importance.append(bst.feature_importance("gain"))
              
        #predict with validation data
        val_pred.append(bst.predict(
            X_val, num_iteration=bst.best_iteration or MAX_ROUNDS))

        #predict with test data
        test_pred.append(bst.predict(
            X_test, num_iteration=bst.best_iteration or MAX_ROUNDS))
        toc = datetime.now()
        print(f'model runtime: {toc-tic}')

    #calculate MSE
    print("Validation mse:", mean_squared_error(
        y_val, np.array(val_pred).transpose()))
        
    #calculate validation error
    weight = items["perishable"] * 0.25 + 1
    err = (y_val - np.array(val_pred).transpose())**2
    err = err.sum(axis=1) * weight
    err = np.sqrt(err.sum() / weight.sum() / 15)
    print(f'nwrmsle = {err}')
    val_results_run_2.append(err)

    #save data
    df_preds_test = pd.DataFrame(
        np.array(test_pred).transpose(), index=df_2017_index,
        columns=pd.date_range("2017-08-01", periods=15)
    )
    df_preds_test.index.set_names(["store_nbr", "item_nbr"], inplace=True)
    df_preds_test.to_csv(folder_path + 'lgbm_test_pred_model_' + str(num) +'.csv')
    
    #save featue_importance
    df_feature_importance = pd.DataFrame(np.array(feature_importance).transpose(),
                                         index = X_train.columns,
                                        columns = pd.date_range('2017-08-01', periods=15))
    df_feature_importance.to_csv(folder_path + 'lgbm_feature_importance_model_' + str(num) +'.csv')

print(val_results_run_2)
          
with open(folder_path + 'val_results_run_2.txt', 'w') as filehandle:
    for val_result in val_results_run_2:
        filehandle.write(f'{val_result}\n')

**************************************************
Model 6
num_leaves: 31
feature_fraction: 1
bagging_fraction: 0.6
**************************************************
--------------------------------------------------
Step 1
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.433914	valid_1's l2: 0.416631
[100]	training's l2: 0.334907	valid_1's l2: 0.323782
[150]	training's l2: 0.316627	valid_1's l2: 0.308567
[200]	training's l2: 0.311237	valid_1's l2: 0.304801
[250]	training's l2: 0.308533	valid_1's l2: 0.303305
[300]	training's l2: 0.306606	valid_1's l2: 0.302378
[350]	training's l2: 0.305225	valid_1's l2: 0.301878
[400]	training's l2: 0.304099	valid_1's l2: 0.301615
[450]	training's l2: 0.303159	valid_1's l2: 0.301395
[500]	training's l2: 0.302258	valid_1's l2: 0.301211
[550]	training's l2: 0.301435	valid_1's l2: 0.301067
[600]	training's l2: 0.300646	valid_1's l2: 0.300945
[650]	training's l2: 0.2999

[1100]	training's l2: 0.317787	valid_1's l2: 0.325906
[1150]	training's l2: 0.317145	valid_1's l2: 0.32583
[1200]	training's l2: 0.316529	valid_1's l2: 0.325799
[1250]	training's l2: 0.315921	valid_1's l2: 0.325775
[1300]	training's l2: 0.315339	valid_1's l2: 0.325754
[1350]	training's l2: 0.314763	valid_1's l2: 0.325724
[1400]	training's l2: 0.314198	valid_1's l2: 0.325718
[1450]	training's l2: 0.313641	valid_1's l2: 0.325698
[1500]	training's l2: 0.313109	valid_1's l2: 0.32569
[1550]	training's l2: 0.312546	valid_1's l2: 0.325676
[1600]	training's l2: 0.31199	valid_1's l2: 0.325642
[1650]	training's l2: 0.311471	valid_1's l2: 0.325611
[1700]	training's l2: 0.310936	valid_1's l2: 0.325603
[1750]	training's l2: 0.310414	valid_1's l2: 0.325594
[1800]	training's l2: 0.309883	valid_1's l2: 0.325576
[1850]	training's l2: 0.309369	valid_1's l2: 0.325527
[1900]	training's l2: 0.308884	valid_1's l2: 0.325517
[1950]	training's l2: 0.308417	valid_1's l2: 0.325526
[2000]	training's l2: 0.307931	

model runtime: 0:01:54.751719
--------------------------------------------------
Step 8
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.46492	valid_1's l2: 0.456822
[100]	training's l2: 0.372225	valid_1's l2: 0.367264
[150]	training's l2: 0.353603	valid_1's l2: 0.351022
[200]	training's l2: 0.347123	valid_1's l2: 0.346321
[250]	training's l2: 0.343579	valid_1's l2: 0.344337
[300]	training's l2: 0.341074	valid_1's l2: 0.343109
[350]	training's l2: 0.339296	valid_1's l2: 0.342499
[400]	training's l2: 0.337924	valid_1's l2: 0.342208
[450]	training's l2: 0.33676	valid_1's l2: 0.342051
[500]	training's l2: 0.335706	valid_1's l2: 0.341847
[550]	training's l2: 0.334744	valid_1's l2: 0.341765
[600]	training's l2: 0.333809	valid_1's l2: 0.34166
[650]	training's l2: 0.332955	valid_1's l2: 0.341647
[700]	training's l2: 0.332112	valid_1's l2: 0.341613
[750]	training's l2: 0.331345	valid_1's l2: 0.341511
[800]	tr

[600]	training's l2: 0.363565	valid_1's l2: 0.377561
[650]	training's l2: 0.362525	valid_1's l2: 0.377443
[700]	training's l2: 0.361444	valid_1's l2: 0.377275
[750]	training's l2: 0.360421	valid_1's l2: 0.377147
[800]	training's l2: 0.359518	valid_1's l2: 0.377045
[850]	training's l2: 0.358656	valid_1's l2: 0.376922
[900]	training's l2: 0.357812	valid_1's l2: 0.376938
[950]	training's l2: 0.356979	valid_1's l2: 0.376941
[1000]	training's l2: 0.356116	valid_1's l2: 0.376772
[1050]	training's l2: 0.355306	valid_1's l2: 0.376754
[1100]	training's l2: 0.354558	valid_1's l2: 0.376716
[1150]	training's l2: 0.353847	valid_1's l2: 0.376683
[1200]	training's l2: 0.353053	valid_1's l2: 0.376601
[1250]	training's l2: 0.352406	valid_1's l2: 0.376621
[1300]	training's l2: 0.351719	valid_1's l2: 0.376583
[1350]	training's l2: 0.35101	valid_1's l2: 0.376574
[1400]	training's l2: 0.35033	valid_1's l2: 0.376586
Early stopping, best iteration is:
[1319]	training's l2: 0.351462	valid_1's l2: 0.376546
mod

[750]	training's l2: 0.30019	valid_1's l2: 0.297365
[800]	training's l2: 0.299553	valid_1's l2: 0.297293
[850]	training's l2: 0.298945	valid_1's l2: 0.297233
[900]	training's l2: 0.298393	valid_1's l2: 0.297197
[950]	training's l2: 0.297822	valid_1's l2: 0.297161
[1000]	training's l2: 0.297255	valid_1's l2: 0.297091
[1050]	training's l2: 0.296725	valid_1's l2: 0.29707
[1100]	training's l2: 0.296172	valid_1's l2: 0.297007
[1150]	training's l2: 0.295647	valid_1's l2: 0.296963
[1200]	training's l2: 0.295108	valid_1's l2: 0.296955
[1250]	training's l2: 0.294585	valid_1's l2: 0.296929
[1300]	training's l2: 0.294106	valid_1's l2: 0.296899
[1350]	training's l2: 0.293623	valid_1's l2: 0.296861
[1400]	training's l2: 0.293127	valid_1's l2: 0.296811
[1450]	training's l2: 0.292685	valid_1's l2: 0.2968
[1500]	training's l2: 0.29221	valid_1's l2: 0.296803
[1550]	training's l2: 0.29174	valid_1's l2: 0.296763
[1600]	training's l2: 0.291291	valid_1's l2: 0.296754
[1650]	training's l2: 0.290839	valid_1'

[1850]	training's l2: 0.328217	valid_1's l2: 0.345226
[1900]	training's l2: 0.327748	valid_1's l2: 0.345225
[1950]	training's l2: 0.32726	valid_1's l2: 0.345223
Early stopping, best iteration is:
[1829]	training's l2: 0.328424	valid_1's l2: 0.345204
model runtime: 0:02:07.181059
--------------------------------------------------
Step 6
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.524612	valid_1's l2: 0.507754
[100]	training's l2: 0.401332	valid_1's l2: 0.390675
[150]	training's l2: 0.37801	valid_1's l2: 0.36981
[200]	training's l2: 0.370403	valid_1's l2: 0.363926
[250]	training's l2: 0.365933	valid_1's l2: 0.360757
[300]	training's l2: 0.362465	valid_1's l2: 0.358611
[350]	training's l2: 0.360149	valid_1's l2: 0.357557
[400]	training's l2: 0.358199	valid_1's l2: 0.356834
[450]	training's l2: 0.356606	valid_1's l2: 0.356442
[500]	training's l2: 0.355206	valid_1's l2: 0.356095
[550]	training's l2: 0

[1500]	training's l2: 0.309827	valid_1's l2: 0.329863
[1550]	training's l2: 0.309297	valid_1's l2: 0.32986
[1600]	training's l2: 0.308765	valid_1's l2: 0.32982
[1650]	training's l2: 0.308285	valid_1's l2: 0.329823
[1700]	training's l2: 0.307808	valid_1's l2: 0.32983
[1750]	training's l2: 0.307307	valid_1's l2: 0.32983
Early stopping, best iteration is:
[1643]	training's l2: 0.308354	valid_1's l2: 0.32981
model runtime: 0:01:58.221516
--------------------------------------------------
Step 10
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.445515	valid_1's l2: 0.464102
[100]	training's l2: 0.364816	valid_1's l2: 0.375965
[150]	training's l2: 0.349048	valid_1's l2: 0.358751
[200]	training's l2: 0.343497	valid_1's l2: 0.353532
[250]	training's l2: 0.340269	valid_1's l2: 0.351162
[300]	training's l2: 0.337916	valid_1's l2: 0.349911
[350]	training's l2: 0.336142	valid_1's l2: 0.349224
[400]	training's l2:

[150]	training's l2: 0.38245	valid_1's l2: 0.394247
[200]	training's l2: 0.376117	valid_1's l2: 0.390733
[250]	training's l2: 0.37223	valid_1's l2: 0.389331
[300]	training's l2: 0.36957	valid_1's l2: 0.388513
[350]	training's l2: 0.367452	valid_1's l2: 0.387857
[400]	training's l2: 0.365871	valid_1's l2: 0.387706
[450]	training's l2: 0.36445	valid_1's l2: 0.387801
[500]	training's l2: 0.363175	valid_1's l2: 0.387786
Early stopping, best iteration is:
[396]	training's l2: 0.365975	valid_1's l2: 0.387659
model runtime: 0:00:50.950536
--------------------------------------------------
Step 15
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.480888	valid_1's l2: 0.470373
[100]	training's l2: 0.389702	valid_1's l2: 0.388768
[150]	training's l2: 0.371852	valid_1's l2: 0.376637
[200]	training's l2: 0.365661	valid_1's l2: 0.374751
[250]	training's l2: 0.362102	valid_1's l2: 0.374417
[300]	training's l2: 0.359

[800]	training's l2: 0.3245	valid_1's l2: 0.328743
[850]	training's l2: 0.323895	valid_1's l2: 0.328707
[900]	training's l2: 0.323332	valid_1's l2: 0.328727
[950]	training's l2: 0.322763	valid_1's l2: 0.328759
Early stopping, best iteration is:
[851]	training's l2: 0.323884	valid_1's l2: 0.328704
model runtime: 0:01:16.265417
--------------------------------------------------
Step 4
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.468533	valid_1's l2: 0.446603
[100]	training's l2: 0.366463	valid_1's l2: 0.354264
[150]	training's l2: 0.346538	valid_1's l2: 0.338459
[200]	training's l2: 0.339586	valid_1's l2: 0.333557
[250]	training's l2: 0.335666	valid_1's l2: 0.330948
[300]	training's l2: 0.33302	valid_1's l2: 0.329347
[350]	training's l2: 0.331122	valid_1's l2: 0.328336
[400]	training's l2: 0.329623	valid_1's l2: 0.327731
[450]	training's l2: 0.328388	valid_1's l2: 0.327409
[500]	training's l2: 0.327

[50]	training's l2: 0.465077	valid_1's l2: 0.456871
[100]	training's l2: 0.372371	valid_1's l2: 0.367545
[150]	training's l2: 0.353706	valid_1's l2: 0.351233
[200]	training's l2: 0.347155	valid_1's l2: 0.346687
[250]	training's l2: 0.343467	valid_1's l2: 0.344405
[300]	training's l2: 0.34103	valid_1's l2: 0.343268
[350]	training's l2: 0.339319	valid_1's l2: 0.342811
[400]	training's l2: 0.337976	valid_1's l2: 0.342522
[450]	training's l2: 0.336816	valid_1's l2: 0.342228
[500]	training's l2: 0.33586	valid_1's l2: 0.342099
[550]	training's l2: 0.334987	valid_1's l2: 0.341996
[600]	training's l2: 0.334217	valid_1's l2: 0.341961
[650]	training's l2: 0.333485	valid_1's l2: 0.341915
[700]	training's l2: 0.332726	valid_1's l2: 0.341838
[750]	training's l2: 0.33207	valid_1's l2: 0.341801
[800]	training's l2: 0.33143	valid_1's l2: 0.341762
[850]	training's l2: 0.330775	valid_1's l2: 0.341724
[900]	training's l2: 0.330187	valid_1's l2: 0.341733
[950]	training's l2: 0.329616	valid_1's l2: 0.34171

[250]	training's l2: 0.372177	valid_1's l2: 0.389721
[300]	training's l2: 0.369461	valid_1's l2: 0.388801
[350]	training's l2: 0.367564	valid_1's l2: 0.388312
[400]	training's l2: 0.365963	valid_1's l2: 0.38791
[450]	training's l2: 0.36464	valid_1's l2: 0.387875
[500]	training's l2: 0.363464	valid_1's l2: 0.387857
[550]	training's l2: 0.362444	valid_1's l2: 0.38781
[600]	training's l2: 0.361517	valid_1's l2: 0.387763
[650]	training's l2: 0.360641	valid_1's l2: 0.387743
[700]	training's l2: 0.359827	valid_1's l2: 0.387794
[750]	training's l2: 0.359082	valid_1's l2: 0.387939
Early stopping, best iteration is:
[650]	training's l2: 0.360641	valid_1's l2: 0.387743
model runtime: 0:01:07.069666
--------------------------------------------------
Step 15
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.480903	valid_1's l2: 0.470282
[100]	training's l2: 0.389739	valid_1's l2: 0.388773
[150]	training's l2: 0.37

model runtime: 0:01:18.585553
--------------------------------------------------
Step 8
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.448441	valid_1's l2: 0.446411
[100]	training's l2: 0.353591	valid_1's l2: 0.358575
[150]	training's l2: 0.334409	valid_1's l2: 0.34511
[200]	training's l2: 0.326786	valid_1's l2: 0.342357
[250]	training's l2: 0.321437	valid_1's l2: 0.341394
[300]	training's l2: 0.31702	valid_1's l2: 0.34083
[350]	training's l2: 0.313328	valid_1's l2: 0.3407
[400]	training's l2: 0.310055	valid_1's l2: 0.340622
[450]	training's l2: 0.307111	valid_1's l2: 0.340708
[500]	training's l2: 0.304365	valid_1's l2: 0.340779
Early stopping, best iteration is:
[401]	training's l2: 0.309992	valid_1's l2: 0.340621
model runtime: 0:01:17.235014
--------------------------------------------------
Step 9
--------------------------------------------------
Training until validation scores don't improve f

[200]	training's l2: 0.295914	valid_1's l2: 0.29729
[250]	training's l2: 0.291265	valid_1's l2: 0.296413
[300]	training's l2: 0.287466	valid_1's l2: 0.296084
[350]	training's l2: 0.284166	valid_1's l2: 0.295925
[400]	training's l2: 0.281397	valid_1's l2: 0.295928
[450]	training's l2: 0.27882	valid_1's l2: 0.295907
[500]	training's l2: 0.276432	valid_1's l2: 0.295905
[550]	training's l2: 0.274096	valid_1's l2: 0.29589
[600]	training's l2: 0.271832	valid_1's l2: 0.295904
[650]	training's l2: 0.269758	valid_1's l2: 0.295876
[700]	training's l2: 0.267697	valid_1's l2: 0.295906
[750]	training's l2: 0.265643	valid_1's l2: 0.295948
Early stopping, best iteration is:
[648]	training's l2: 0.269835	valid_1's l2: 0.29587
model runtime: 0:01:47.891945
--------------------------------------------------
Step 3
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.429324	valid_1's l2: 0.415569
[100]	training's l2: 0.3436

[50]	training's l2: 0.455534	valid_1's l2: 0.45772
[100]	training's l2: 0.354803	valid_1's l2: 0.364028
[150]	training's l2: 0.333761	valid_1's l2: 0.348591
[200]	training's l2: 0.32516	valid_1's l2: 0.344929
[250]	training's l2: 0.319367	valid_1's l2: 0.343421
[300]	training's l2: 0.314635	valid_1's l2: 0.342502
[350]	training's l2: 0.310755	valid_1's l2: 0.342238
[400]	training's l2: 0.30745	valid_1's l2: 0.342062
[450]	training's l2: 0.304468	valid_1's l2: 0.342088
[500]	training's l2: 0.301664	valid_1's l2: 0.342058
Early stopping, best iteration is:
[407]	training's l2: 0.306999	valid_1's l2: 0.342027
model runtime: 0:01:31.924315
--------------------------------------------------
Step 12
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.503387	valid_1's l2: 0.47945
[100]	training's l2: 0.388231	valid_1's l2: 0.388116
[150]	training's l2: 0.364852	valid_1's l2: 0.377345
[200]	training's l2: 0.3554

Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.504052	valid_1's l2: 0.49186
[100]	training's l2: 0.380013	valid_1's l2: 0.378138
[150]	training's l2: 0.355498	valid_1's l2: 0.360574
[200]	training's l2: 0.346057	valid_1's l2: 0.356705
[250]	training's l2: 0.339532	valid_1's l2: 0.355339
[300]	training's l2: 0.334222	valid_1's l2: 0.354496
[350]	training's l2: 0.329594	valid_1's l2: 0.353981
[400]	training's l2: 0.325728	valid_1's l2: 0.353723
[450]	training's l2: 0.322404	valid_1's l2: 0.353628
[500]	training's l2: 0.319506	valid_1's l2: 0.353569
[550]	training's l2: 0.316913	valid_1's l2: 0.353549
[600]	training's l2: 0.314409	valid_1's l2: 0.353577
Early stopping, best iteration is:
[523]	training's l2: 0.318306	valid_1's l2: 0.353531
model runtime: 0:01:47.328021
--------------------------------------------------
Step 7
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's

### Run #3

In [69]:
val_results_run_3 = []

start_value = 12
end_value = 18

for num, grid_item in enumerate(param_grid[start_value:end_value],start_value): 
              
    params = {
        'num_leaves': grid_item[0],
        'objective': 'regression',
        'learning_rate': 0.02,
        'feature_fraction': grid_item[1],
        'bagging_fraction': grid_item[2],
        'bagging_freq': 1,
        'metric': 'l2',
        'num_threads': 0
    }

    print("*" * 50)
    print(f'Model {num}')
    print(f'num_leaves: {params["num_leaves"]}')
    print(f'feature_fraction: {params["feature_fraction"]}')
    print(f'bagging_fraction: {params["bagging_fraction"]}')
    print("*" * 50)
    
    MAX_ROUNDS = 5000
    val_pred = []
    test_pred = []
    feature_importance = []
    
    for i in range(15):
        tic = datetime.now()
        print("-" * 50)
        print(f'Step {i+1}')
        print("-" * 50)
        dtrain = lgb.Dataset(
            X_train, label=y_train[:, i],
            weight=pd.concat([items["perishable"]] * num_days) * 0.25 + 1
        )
        dval = lgb.Dataset(
            X_val, label=y_val[:, i], reference=dtrain,
            weight=items["perishable"] * 0.25 + 1
        )
        bst = lgb.train(
            params, dtrain, num_boost_round=MAX_ROUNDS,
            valid_sets=[dtrain, dval], early_stopping_rounds=125, verbose_eval=50
        )
        
        feature_importance.append(bst.feature_importance("gain"))
              
        #predict with validation data
        val_pred.append(bst.predict(
            X_val, num_iteration=bst.best_iteration or MAX_ROUNDS))

        #predict with test data
        test_pred.append(bst.predict(
            X_test, num_iteration=bst.best_iteration or MAX_ROUNDS))
        toc = datetime.now()
        print(f'model runtime: {toc-tic}')

    #calculate MSE
    print("Validation mse:", mean_squared_error(
        y_val, np.array(val_pred).transpose()))
    
    #calculate validation error
    weight = items["perishable"] * 0.25 + 1
    err = (y_val - np.array(val_pred).transpose())**2
    err = err.sum(axis=1) * weight
    err = np.sqrt(err.sum() / weight.sum() / 15)
    print(f'nwrmsle = {err}')
    val_results_run_3.append(err)

    #save data
    df_preds_test = pd.DataFrame(
        np.array(test_pred).transpose(), index=df_2017_index,
        columns=pd.date_range("2017-08-01", periods=15)
    )
    df_preds_test.index.set_names(["store_nbr", "item_nbr"], inplace=True)
    df_preds_test.to_csv(folder_path + 'lgbm_test_pred_model_' + str(num) +'.csv')
    
    #save featue_importance
    df_feature_importance = pd.DataFrame(np.array(feature_importance).transpose(),
                                         index = X_train.columns,
                                        columns = pd.date_range('2017-08-01', periods=15))
    df_feature_importance.to_csv(folder_path + 'lgbm_feature_importance_model_' + str(num) +'.csv')

print(val_results_run_3)
          
with open(folder_path + 'val_results_run_3.txt', 'w') as filehandle:
    for val_result in val_results_run_3:
        filehandle.write(f'{val_result}\n')

**************************************************
Model 12
num_leaves: 200
feature_fraction: 0.9
bagging_fraction: 0.6
**************************************************
--------------------------------------------------
Step 1
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.417028	valid_1's l2: 0.404258
[100]	training's l2: 0.319388	valid_1's l2: 0.315791
[150]	training's l2: 0.300764	valid_1's l2: 0.303109
[200]	training's l2: 0.29385	valid_1's l2: 0.300669
[250]	training's l2: 0.289235	valid_1's l2: 0.299851
[300]	training's l2: 0.285437	valid_1's l2: 0.299371
[350]	training's l2: 0.282213	valid_1's l2: 0.299126
[400]	training's l2: 0.279413	valid_1's l2: 0.299054
[450]	training's l2: 0.276825	valid_1's l2: 0.298981
[500]	training's l2: 0.274332	valid_1's l2: 0.298966
[550]	training's l2: 0.271953	valid_1's l2: 0.298936
[600]	training's l2: 0.269694	valid_1's l2: 0.298875
[650]	training's l2: 0.2

[350]	training's l2: 0.301374	valid_1's l2: 0.3289
[400]	training's l2: 0.298252	valid_1's l2: 0.328802
[450]	training's l2: 0.295419	valid_1's l2: 0.328732
[500]	training's l2: 0.292756	valid_1's l2: 0.328791
[550]	training's l2: 0.290198	valid_1's l2: 0.328858
Early stopping, best iteration is:
[455]	training's l2: 0.295125	valid_1's l2: 0.328717
model runtime: 0:01:30.864551
--------------------------------------------------
Step 10
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.431126	valid_1's l2: 0.454633
[100]	training's l2: 0.34801	valid_1's l2: 0.368235
[150]	training's l2: 0.330493	valid_1's l2: 0.352819
[200]	training's l2: 0.323174	valid_1's l2: 0.349081
[250]	training's l2: 0.317967	valid_1's l2: 0.347671
[300]	training's l2: 0.313679	valid_1's l2: 0.346936
[350]	training's l2: 0.31009	valid_1's l2: 0.346682
[400]	training's l2: 0.306866	valid_1's l2: 0.346562
[450]	training's l2: 0.303

[250]	training's l2: 0.312605	valid_1's l2: 0.326016
[300]	training's l2: 0.308036	valid_1's l2: 0.325217
[350]	training's l2: 0.304232	valid_1's l2: 0.324865
[400]	training's l2: 0.300885	valid_1's l2: 0.324684
[450]	training's l2: 0.29791	valid_1's l2: 0.324552
[500]	training's l2: 0.29515	valid_1's l2: 0.324561
[550]	training's l2: 0.292634	valid_1's l2: 0.324552
[600]	training's l2: 0.290222	valid_1's l2: 0.324518
[650]	training's l2: 0.287815	valid_1's l2: 0.324512
[700]	training's l2: 0.285542	valid_1's l2: 0.32454
Early stopping, best iteration is:
[603]	training's l2: 0.290065	valid_1's l2: 0.324509
model runtime: 0:01:54.600631
--------------------------------------------------
Step 5
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.48322	valid_1's l2: 0.459379
[100]	training's l2: 0.367652	valid_1's l2: 0.360602
[150]	training's l2: 0.345433	valid_1's l2: 0.347895
[200]	training's l2: 0.3368

[200]	training's l2: 0.362336	valid_1's l2: 0.382962
[250]	training's l2: 0.355373	valid_1's l2: 0.382724
[300]	training's l2: 0.349843	valid_1's l2: 0.382639
[350]	training's l2: 0.34492	valid_1's l2: 0.382641
[400]	training's l2: 0.340895	valid_1's l2: 0.38281
Early stopping, best iteration is:
[301]	training's l2: 0.349706	valid_1's l2: 0.382612
model runtime: 0:01:24.926196
--------------------------------------------------
Step 14
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.477482	valid_1's l2: 0.488546
[100]	training's l2: 0.380887	valid_1's l2: 0.40148
[150]	training's l2: 0.360747	valid_1's l2: 0.388818
[200]	training's l2: 0.352211	valid_1's l2: 0.386683
[250]	training's l2: 0.346103	valid_1's l2: 0.385951
[300]	training's l2: 0.340913	valid_1's l2: 0.385447
[350]	training's l2: 0.336603	valid_1's l2: 0.385282
[400]	training's l2: 0.333001	valid_1's l2: 0.385334
[450]	training's l2: 0.32

[100]	training's l2: 0.353242	valid_1's l2: 0.358521
[150]	training's l2: 0.334048	valid_1's l2: 0.345033
[200]	training's l2: 0.326412	valid_1's l2: 0.342414
[250]	training's l2: 0.320997	valid_1's l2: 0.341334
[300]	training's l2: 0.316471	valid_1's l2: 0.340787
[350]	training's l2: 0.312619	valid_1's l2: 0.340564
[400]	training's l2: 0.3096	valid_1's l2: 0.340582
[450]	training's l2: 0.306987	valid_1's l2: 0.340597
Early stopping, best iteration is:
[372]	training's l2: 0.311248	valid_1's l2: 0.340528
model runtime: 0:01:31.996318
--------------------------------------------------
Step 9
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.440998	valid_1's l2: 0.444344
[100]	training's l2: 0.341286	valid_1's l2: 0.349735
[150]	training's l2: 0.321687	valid_1's l2: 0.33467
[200]	training's l2: 0.314162	valid_1's l2: 0.331322
[250]	training's l2: 0.308988	valid_1's l2: 0.330056
[300]	training's l2: 0.304

[200]	training's l2: 0.319425	valid_1's l2: 0.329159
[250]	training's l2: 0.31436	valid_1's l2: 0.328563
[300]	training's l2: 0.310066	valid_1's l2: 0.328175
[350]	training's l2: 0.306421	valid_1's l2: 0.328057
[400]	training's l2: 0.303287	valid_1's l2: 0.328095
[450]	training's l2: 0.300312	valid_1's l2: 0.328161
Early stopping, best iteration is:
[336]	training's l2: 0.307345	valid_1's l2: 0.32802
model runtime: 0:01:20.999634
--------------------------------------------------
Step 4
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.44921	valid_1's l2: 0.432207
[100]	training's l2: 0.346586	valid_1's l2: 0.34305
[150]	training's l2: 0.326315	valid_1's l2: 0.330239
[200]	training's l2: 0.318266	valid_1's l2: 0.32753
[250]	training's l2: 0.312706	valid_1's l2: 0.326407
[300]	training's l2: 0.308136	valid_1's l2: 0.325714
[350]	training's l2: 0.304361	valid_1's l2: 0.325402
[400]	training's l2: 0.30107

model runtime: 0:01:32.111429
--------------------------------------------------
Step 13
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.517894	valid_1's l2: 0.498829
[100]	training's l2: 0.397366	valid_1's l2: 0.397075
[150]	training's l2: 0.372649	valid_1's l2: 0.384362
[200]	training's l2: 0.362583	valid_1's l2: 0.383179
[250]	training's l2: 0.355546	valid_1's l2: 0.38301
[300]	training's l2: 0.349995	valid_1's l2: 0.383083
[350]	training's l2: 0.345328	valid_1's l2: 0.383229
Early stopping, best iteration is:
[245]	training's l2: 0.356184	valid_1's l2: 0.382964
model runtime: 0:01:13.543281
--------------------------------------------------
Step 14
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.477057	valid_1's l2: 0.488191
[100]	training's l2: 0.380782	valid_1's l2: 0.400995
[150]	training's l2: 0.360781	vali

model runtime: 0:01:56.816751
--------------------------------------------------
Step 7
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.461471	valid_1's l2: 0.46759
[100]	training's l2: 0.364583	valid_1's l2: 0.372286
[150]	training's l2: 0.34501	valid_1's l2: 0.356641
[200]	training's l2: 0.336797	valid_1's l2: 0.352813
[250]	training's l2: 0.33118	valid_1's l2: 0.351515
[300]	training's l2: 0.326158	valid_1's l2: 0.35079
[350]	training's l2: 0.322121	valid_1's l2: 0.350575
[400]	training's l2: 0.318678	valid_1's l2: 0.35045
[450]	training's l2: 0.315527	valid_1's l2: 0.350368
[500]	training's l2: 0.312518	valid_1's l2: 0.350475
[550]	training's l2: 0.309789	valid_1's l2: 0.350489
Early stopping, best iteration is:
[447]	training's l2: 0.315696	valid_1's l2: 0.350356
model runtime: 0:01:41.175977
--------------------------------------------------
Step 8
----------------------------------------------

[450]	training's l2: 0.276831	valid_1's l2: 0.298866
[500]	training's l2: 0.274767	valid_1's l2: 0.298848
[550]	training's l2: 0.272798	valid_1's l2: 0.29885
[600]	training's l2: 0.270915	valid_1's l2: 0.298856
[650]	training's l2: 0.269062	valid_1's l2: 0.29886
Early stopping, best iteration is:
[569]	training's l2: 0.272045	valid_1's l2: 0.29883
model runtime: 0:01:52.416549
--------------------------------------------------
Step 2
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.42396	valid_1's l2: 0.404211
[100]	training's l2: 0.321901	valid_1's l2: 0.312802
[150]	training's l2: 0.302841	valid_1's l2: 0.300034
[200]	training's l2: 0.295779	valid_1's l2: 0.297531
[250]	training's l2: 0.291056	valid_1's l2: 0.296707
[300]	training's l2: 0.287097	valid_1's l2: 0.296273
[350]	training's l2: 0.283804	valid_1's l2: 0.296164
[400]	training's l2: 0.281119	valid_1's l2: 0.296155
[450]	training's l2: 0.2788

[250]	training's l2: 0.319065	valid_1's l2: 0.343997
[300]	training's l2: 0.314421	valid_1's l2: 0.34319
[350]	training's l2: 0.310393	valid_1's l2: 0.342818
[400]	training's l2: 0.307177	valid_1's l2: 0.3428
[450]	training's l2: 0.304481	valid_1's l2: 0.342817
Early stopping, best iteration is:
[360]	training's l2: 0.309673	valid_1's l2: 0.342776
model runtime: 0:01:38.492400
--------------------------------------------------
Step 12
--------------------------------------------------
Training until validation scores don't improve for 125 rounds
[50]	training's l2: 0.502795	valid_1's l2: 0.479263
[100]	training's l2: 0.388085	valid_1's l2: 0.388555
[150]	training's l2: 0.36482	valid_1's l2: 0.378088
[200]	training's l2: 0.35534	valid_1's l2: 0.376573
[250]	training's l2: 0.348938	valid_1's l2: 0.376321
[300]	training's l2: 0.343479	valid_1's l2: 0.375833
[350]	training's l2: 0.338995	valid_1's l2: 0.375627
[400]	training's l2: 0.335312	valid_1's l2: 0.37554
[450]	training's l2: 0.33234

In [None]:
#Manually compiling the result due to computer crash
#val_result_run_1 = [0.5872718050674727, 0.5873143138903639, 0.5876759499194352, 0.5874139858966546, 0.5874023160917615, 0.5878217115658666]
#val_result_run_2 = [0.5875244205187689, 0.5874967800573019, 0.5878745768757881, 0.5863037771886317, 0.5861946713091547, 0.5863312204831337]
#val_result_run_3 = [0.5864005897634857, 0.5863076932051245, 0.5865439595121598, 0.5865449715362678, 0.5864578642166351, 0.5867543150265673]

In [72]:
val_results_all  = val_results_run_1 + val_results_run_2 + val_results_run_3
print(val_results_all)

[0.5877418750255925, 0.5876746516156044, 0.5881172916574132, 0.5878097976764276, 0.5878256686637173, 0.5882729044227182, 0.5878852507920574, 0.5879840469762064, 0.5884032028033929, 0.5867102587602677, 0.5866896155206477, 0.5868599336087861, 0.5868187773976103, 0.586774476729484, 0.5869875794626516, 0.5869545868239956, 0.5869062287139597, 0.5871591986955973]


In [76]:
lowest_index = np.argmin(val_results_all)
print(f'model_{lowest_index} has the lowest validation error')
print(f'model setting is {param_grid[lowest_index]}')
print(f'lowest validation nwrmsle is {val_results_all[lowest_index]}')

model_10 has the lowest validation error
model setting is (200, 0.8, 0.8)
lowest validation nwrmsle is 0.5866896155206477


In [77]:
X_train.columns

Index(['promo_7', 'promo_14', 'promo_30', 'promo_3_aft', 'promo_7_aft',
       'promo_14_aft', 'has_promo_mean_3', 'no_promo_mean_3',
       'has_promo_mean_7', 'no_promo_mean_7',
       ...
       'cluster_8', 'cluster_9', 'cluster_10', 'cluster_11', 'cluster_12',
       'cluster_13', 'cluster_14', 'cluster_15', 'cluster_16', 'cluster_17'],
      dtype='object', length=544)