**IMPORT PACKAGES | BUILD CUSTOM FUNCTIONS | SET PARAMETERS**

In [None]:
import numpy as np
import pandas as pd
import logging
import datetime
import lightgbm as lgb
import random
import os
import psutil
import argparse
import time
import warnings
import gc
import pickle
import math
import shutil
import math, decimal

from math import ceil
from sklearn.metrics import mean_squared_error

warnings.filterwarnings('ignore')

In [None]:
random.seed(42)
np.random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)

In [None]:
class Util(object):
    @staticmethod
    def set_seed(seed):
        random.seed(seed)
        np.random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        return

    @staticmethod
    def get_memory_usage():
        return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2. ** 30, 2)

    @staticmethod
    def reduce_mem_usage(df, verbose=False):
        numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
        start_mem = df.memory_usage().sum() / 1024 ** 2
        for col in df.columns:
            col_type = df[col].dtypes
            if col_type in numerics:
                c_min = df[col].min()
                c_max = df[col].max()
                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)
                else:
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        df[col] = df[col].astype(np.float64)
        end_mem = df.memory_usage().sum() / 1024 ** 2
        if verbose:
            print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (
                    start_mem - end_mem) / start_mem))
        return df

    @staticmethod
    def merge_by_concat(df1, df2, merge_on):
        merged_gf = df1[merge_on]
        merged_gf = merged_gf.merge(df2, on=merge_on, how='left')
        new_columns = [col for col in list(merged_gf) if col not in merge_on]
        df1 = pd.concat([df1, merged_gf[new_columns]], axis=1)
        return df1

In [None]:
main_index_list = ['id', 'd']
target = 'demand'
prediction_horizon = 28
start_train_day_x = 1
end_train_day_x = 1941

remove_features = ['id', 'state_id', 'store_id', 'wm_yr_wk', 'd', target]

**OPTION TO DOWNLOAD DATA**

In [None]:
# %%bash
# pip install dask_xgboost
# pip install kaggle
# export KAGGLE_USERNAME=jmiller558
# export KAGGLE_KEY=812fcd89e3a0fc00cb629bf2306b215e

# kaggle competitions download -c m5-forecasting-accuracy

# unzip -n m5-forecasting-accuracy -d m5-forecasting-accuracy
# rm -rf sample_data

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting dask_xgboost
  Downloading dask_xgboost-0.2.0-py2.py3-none-any.whl (14 kB)
Installing collected packages: dask-xgboost
Successfully installed dask-xgboost-0.2.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Downloading m5-forecasting-accuracy.zip to /content

Archive:  m5-forecasting-accuracy.zip
  inflating: m5-forecasting-accuracy/calendar.csv  
  inflating: m5-forecasting-accuracy/sales_train_evaluation.csv  
  inflating: m5-forecasting-accuracy/sales_train_validation.csv  
  inflating: m5-forecasting-accuracy/sample_submission.csv  
  inflating: m5-forecasting-accuracy/sell_prices.csv  


  0%|          | 0.00/45.8M [00:00<?, ?B/s] 11%|█         | 5.00M/45.8M [00:00<00:02, 16.1MB/s] 20%|█▉        | 9.00M/45.8M [00:00<00:01, 19.4MB/s] 72%|███████▏  | 33.0M/45.8M [00:00<00:00, 66.2MB/s] 90%|████████▉ | 41.0M/45.8M [00:01<00:00, 41.6MB/s]100%|██████████| 45.8M/45.8M [00:01<00:00, 43.9MB/s]


**OPTION TO MOUNT DRIVE WITH DATA**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**PATHS FOR DATA**

In [None]:
#input paths for base data
trainpath = '/content/drive/MyDrive/Capstone/Models/m5-forecasting-accuracy/sales_train_evaluation.csv'
pricepath = '/content/drive/MyDrive/Capstone/Models/m5-forecasting-accuracy/sell_prices.csv'
calpath = '/content/drive/MyDrive/Capstone/Models/m5-forecasting-accuracy/calendar.csv'
submissionpath = '/content/drive/MyDrive/Capstone/Models/m5-forecasting-accuracy/sample_submission.csv'

In [None]:
#input paths for Feature Engineered DFs
grid_base_path = '/content/drive/MyDrive/Capstone/Models/Top4_Original/grid_base'
calfeats_path = '/content/drive/MyDrive/Capstone/Models/Top4_Original/calfeats'
pricefeats_path = '/content/drive/MyDrive/Capstone/Models/Top4_Original/pricefeats'
encoding_path = '/content/drive/MyDrive/Capstone/Models/Top4_Original/encodingfeats'
lagfeats_path = '/content/drive/MyDrive/Capstone/Models/Top4_Original/lagfeats_'

In [None]:
pd.read_pickle(pricefeats_path)

Unnamed: 0,id,d,sell_price,price_max,price_min,price_std,price_mean,price_norm,price_nunique,item_nunique,price_momentum,price_momentum_m,price_momentum_y,sell_price_cent,price_max_cent,price_min_cent
0,HOBBIES_1_001_CA_1_evaluation,1,,,,,,,,,,,,,,
1,HOBBIES_1_002_CA_1_evaluation,1,,,,,,,,,,,,,,
2,HOBBIES_1_003_CA_1_evaluation,1,,,,,,,,,,,,,,
3,HOBBIES_1_004_CA_1_evaluation,1,,,,,,,,,,,,,,
4,HOBBIES_1_005_CA_1_evaluation,1,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60034805,FOODS_3_823_WI_3_evaluation,1969,2.980469,2.980469,2.480469,0.171631,2.800781,1.000000,5.0,206.0,1.0,1.032227,1.022461,0.979980,0.979980,0.479980
60034806,FOODS_3_824_WI_3_evaluation,1969,2.480469,2.679688,2.000000,0.253174,2.507812,0.925293,4.0,135.0,1.0,0.985840,1.112305,0.479980,0.680176,0.000000
60034807,FOODS_3_825_WI_3_evaluation,1969,3.980469,4.378906,3.980469,0.188599,4.117188,0.908691,3.0,150.0,1.0,0.957520,1.000000,0.979980,0.379883,0.979980
60034808,FOODS_3_826_WI_3_evaluation,1969,1.280273,1.280273,1.280273,0.000000,1.280273,1.000000,1.0,44.0,1.0,1.000000,1.000000,0.280029,0.280029,0.280029


**LOAD BASE COMPETITION DATA**

In [None]:
def load_data():
    # self.log.info('load_data')
    train_df = pd.read_csv(trainpath)
    # self.log.info('train_df.shape', train_df.shape)
    prices_df = pd.read_csv(pricepath)
    # self.log.info('prices_df.shape', prices_df.shape)
    calendar_df = pd.read_csv(calpath)
    # self.log.info('calendar_df.shape', calendar_df.shape)
    submission_df = pd.read_csv(submissionpath)
    # self.log.info('submission_df.shape', submission_df.shape)

    return train_df, prices_df, calendar_df, submission_df

train_df, prices_df, calendar_df, submission_df = load_data()

**BASE FEATURE ENGINEERING (ONLY NEEDED FIRST TIME)**



*   BUILD GRID_BASE





In [None]:
index_columns = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']
grid_df = pd.melt(train_df, id_vars=index_columns, var_name='d', value_name=target)

In [None]:
grid_df['d_org'] = grid_df['d']
grid_df['d'] = grid_df['d'].apply(lambda x: x[2:]).astype(np.int16)

In [None]:
grid_df = grid_df[grid_df['d'] <= end_train_day_x]
grid_df['d'] = grid_df['d_org']
grid_df = grid_df.drop('d_org', axis=1)

In [None]:
add_grid = pd.DataFrame()
for i in range(prediction_horizon):
    temp_df = train_df[index_columns]
    temp_df = temp_df.drop_duplicates()
    temp_df['d'] = 'd_' + str(end_train_day_x + i + 1)
    temp_df[target] = np.nan
    add_grid = pd.concat([add_grid, temp_df])

In [None]:
grid_df = pd.concat([grid_df, add_grid])
grid_df = grid_df.reset_index(drop=True)

del temp_df, add_grid

In [None]:
for col in index_columns:
        grid_df[col] = grid_df[col].astype('category')

In [None]:
release_df = prices_df.groupby(['store_id', 'item_id'])['wm_yr_wk'].agg(['min']).reset_index()
release_df.columns = ['store_id', 'item_id', 'release']

In [None]:
grid_df = Util.merge_by_concat(grid_df, release_df, ['store_id', 'item_id'])
del release_df
grid_df = Util.merge_by_concat(grid_df, calendar_df[['wm_yr_wk', 'd']], ['d'])
grid_df = grid_df.reset_index(drop=True)

In [None]:
grid_df['release'] = grid_df['release'] - grid_df['release'].min()
grid_df['release'] = grid_df['release'].astype(np.int16)

In [None]:
grid_df.to_pickle('grid_base')

In [None]:
del grid_df

*   BUILD CAL FEATURES



In [None]:
grid_df = pd.read_pickle(grid_base_path)
calfeats_df = grid_df[main_index_list]
dec = decimal.Decimal

def get_moon_phase(d):  # 0=new, 4=full; 4 days/phase
        diff = datetime.datetime.strptime(d, '%Y-%m-%d') - datetime.datetime(2001, 1, 1)
        days = dec(diff.days) + (dec(diff.seconds) / dec(86400))
        lunations = dec("0.20439731") + (days * dec("0.03386319269"))
        phase_index = math.floor((lunations % dec(1) * dec(8)) + dec('0.5'))
        return int(phase_index) & 7
        
calendar_df['moon'] = calendar_df.date.apply(get_moon_phase)

In [None]:
icols = ['date',
          'd',
          'event_name_1',
          'event_type_1',
          'event_name_2',
          'event_type_2',
          'snap_CA',
          'snap_TX',
          'snap_WI',
          'moon',
          ]

calfeats_df = calfeats_df.merge(calendar_df[icols], on=['d'], how='left')

In [None]:
icols = ['event_name_1',
          'event_type_1',
          'event_name_2',
          'event_type_2',
          'snap_CA',
          'snap_TX',
          'snap_WI']
for col in icols:
  calfeats_df[col] = calfeats_df[col].astype('category')

In [None]:
calfeats_df['date'] = pd.to_datetime(calfeats_df['date'])

calfeats_df['tm_d'] = calfeats_df['date'].dt.day.astype(np.int8)
calfeats_df['tm_w'] = calfeats_df['date'].dt.week.astype(np.int8)
calfeats_df['tm_m'] = calfeats_df['date'].dt.month.astype(np.int8)
calfeats_df['tm_y'] = calfeats_df['date'].dt.year
calfeats_df['tm_y'] = (calfeats_df['tm_y'] - calfeats_df['tm_y'].min()).astype(np.int8)
calfeats_df['tm_wm'] = calfeats_df['tm_d'].apply(lambda x: ceil(x / 7)).astype(np.int8)

calfeats_df['tm_dw'] = calfeats_df['date'].dt.dayofweek.astype(np.int8)
calfeats_df['tm_w_end'] = (calfeats_df['tm_dw'] >= 5).astype(np.int8)

del calfeats_df['date']

In [None]:
calfeats_df.to_pickle('calfeats')
del calfeats_df
del grid_df

*   BUILD PRICING FEATURES



In [None]:
calendar_prices = calendar_df[['wm_yr_wk', 'month', 'year']]
calendar_prices = calendar_prices.drop_duplicates(subset=['wm_yr_wk'])
prices_df = prices_df.merge(calendar_prices[['wm_yr_wk', 'month', 'year']], on=['wm_yr_wk'], how='left')
del calendar_prices

grid_df = pd.read_pickle(grid_base_path)

prices_df = prices_df[prices_df['wm_yr_wk']<=grid_df['wm_yr_wk'].max()]

prices_df['price_max'] = prices_df.groupby(['store_id', 'item_id'])['sell_price'].transform('max')
prices_df['price_min'] = prices_df.groupby(['store_id', 'item_id'])['sell_price'].transform('min')
prices_df['price_std'] = prices_df.groupby(['store_id', 'item_id'])['sell_price'].transform('std')
prices_df['price_mean'] = prices_df.groupby(['store_id', 'item_id'])['sell_price'].transform('mean')
prices_df['price_norm'] = prices_df['sell_price'] / prices_df['price_max']
prices_df['price_nunique'] = prices_df.groupby(['store_id', 'item_id'])['sell_price'].transform('nunique')
prices_df['item_nunique'] = prices_df.groupby(['store_id', 'sell_price'])['item_id'].transform('nunique')

prices_df['price_momentum'] = prices_df['sell_price'] / prices_df.groupby(['store_id', 'item_id'])[
    'sell_price'].transform(lambda x: x.shift(1))
prices_df['price_momentum_m'] = prices_df['sell_price'] / prices_df.groupby(['store_id', 'item_id', 'month'])[
    'sell_price'].transform('mean')
prices_df['price_momentum_y'] = prices_df['sell_price'] / prices_df.groupby(['store_id', 'item_id', 'year'])[
    'sell_price'].transform('mean')

prices_df['sell_price_cent'] = [math.modf(p)[0] for p in prices_df['sell_price']]
prices_df['price_max_cent'] = [math.modf(p)[0] for p in prices_df['price_max']]
prices_df['price_min_cent'] = [math.modf(p)[0] for p in prices_df['price_min']]

del prices_df['month'], prices_df['year']

In [None]:
prices_df

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price,price_max,price_min,price_std,price_mean,price_norm,price_nunique,item_nunique,price_momentum,price_momentum_m,price_momentum_y,sell_price_cent,price_max_cent,price_min_cent
0,CA_1,HOBBIES_1_001,11325,9.58,9.58,8.26,0.152139,8.285714,1.000000,3,3,,1.127059,1.145166,0.58,0.58,0.26
1,CA_1,HOBBIES_1_001,11326,9.58,9.58,8.26,0.152139,8.285714,1.000000,3,3,1.000000,1.127059,1.145166,0.58,0.58,0.26
2,CA_1,HOBBIES_1_001,11327,8.26,9.58,8.26,0.152139,8.285714,0.862213,3,5,0.862213,0.971765,0.987377,0.26,0.58,0.26
3,CA_1,HOBBIES_1_001,11328,8.26,9.58,8.26,0.152139,8.285714,0.862213,3,5,1.000000,1.000000,0.987377,0.26,0.58,0.26
4,CA_1,HOBBIES_1_001,11329,8.26,9.58,8.26,0.152139,8.285714,0.862213,3,5,1.000000,1.000000,0.987377,0.26,0.58,0.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6841116,WI_3,FOODS_3_827,11617,1.00,1.00,1.00,0.000000,1.000000,1.000000,1,142,1.000000,1.000000,1.000000,0.00,0.00,0.00
6841117,WI_3,FOODS_3_827,11618,1.00,1.00,1.00,0.000000,1.000000,1.000000,1,142,1.000000,1.000000,1.000000,0.00,0.00,0.00
6841118,WI_3,FOODS_3_827,11619,1.00,1.00,1.00,0.000000,1.000000,1.000000,1,142,1.000000,1.000000,1.000000,0.00,0.00,0.00
6841119,WI_3,FOODS_3_827,11620,1.00,1.00,1.00,0.000000,1.000000,1.000000,1,142,1.000000,1.000000,1.000000,0.00,0.00,0.00


In [None]:
grid_df = pd.read_pickle(grid_base_path)
original_columns = list(grid_df)
pricefeats_df = grid_df.merge(prices_df, on=['store_id', 'item_id', 'wm_yr_wk'], how='left')
keep_columns = [col for col in list(pricefeats_df) if col not in original_columns]
pricefeats_df = pricefeats_df[main_index_list + keep_columns]
pricefeats_df = Util.reduce_mem_usage(pricefeats_df)
del prices_df

In [None]:
pricefeats_df.to_pickle('pricefeats_df')

In [None]:
del pricefeats_df
del grid_df

*   CREATE ENCODING FEATURES



In [None]:
encoding_df = pd.read_pickle(grid_base_path)
encoding_df['d'] = encoding_df['d'].apply(lambda x: x[2:]).astype(np.int16)
encoding_df[encoding_df['d']>(end_train_day_x)][target] = np.nan
                       
base_cols = list(encoding_df)

icols = [['cat_id'],
    ['dept_id'],
    ['item_id']]

for col in icols:
    col_name = '_' + '_'.join(col) + '_'
    encoding_df['enc' + col_name + 'mean'] = encoding_df.groupby(col)[target].transform('mean').astype(
        np.float16)
    encoding_df['enc' + col_name + 'std'] = encoding_df.groupby(col)[target].transform('std').astype(
        np.float16)

keep_cols = [col for col in list(encoding_df) if col not in base_cols]
encoding_df = encoding_df[['id', 'd'] + keep_cols]

encoding_df.to_pickle('encodingfeats')

In [None]:
del encoding_df

**CREATE LAG FEATURES**

In [None]:
for prediction_horizon in [7,14,21,28]:
    num_lag_day_list = []
    num_lag_day = 15
    for col in range(prediction_horizon, prediction_horizon + num_lag_day):
        num_lag_day_list.append(col)
    num_rolling_day_list = [7, 14, 30, 60, 180]

    lagfeats_df = pd.read_pickle(grid_base_path)
    lagfeats_df['d'] = lagfeats_df['d'].apply(lambda x: x[2:]).astype(np.int16)
    lagfeats_df = lagfeats_df[['id', 'd', target]]
    lagfeats_df[lagfeats_df['d']>end_train_day_x][target] = np.nan

    lagfeats_df = lagfeats_df.assign(**{
        '{}_lag_{}'.format(col, l): lagfeats_df.groupby(['id'])[col].transform(lambda x: x.shift(l))
        for l in num_lag_day_list
        for col in [target]
    })

    for col in list(lagfeats_df):
        if 'lag' in col:
            lagfeats_df[col] = lagfeats_df[col].astype(np.float16)

    for num_rolling_day in num_rolling_day_list:
        lagfeats_df['rolling_mean_' + str(num_rolling_day)] = lagfeats_df.groupby(['id'])[target].transform(
            lambda x: x.shift(prediction_horizon).rolling(num_rolling_day).mean()).astype(np.float16)
        lagfeats_df['rolling_std_' + str(num_rolling_day)] = lagfeats_df.groupby(['id'])[target].transform(
            lambda x: x.shift(prediction_horizon).rolling(num_rolling_day).std()).astype(np.float16)

    lagfeats_df.to_pickle('lagfeats_'+str(prediction_horizon))

**CREATE FULL DATASET BY STORE**

In [None]:
def load_df(store_id,end_train_day_x,prediction_horizon):
    
    grid_base = pd.read_pickle(grid_base_path)
    grid_base['d'] = grid_base['d'].apply(lambda x: x[2:]).astype(np.int16)

    pricefeats = pd.read_pickle(pricefeats_path)
    calfeats = pd.read_pickle(calfeats_path)
    encodingfeats = pd.read_pickle(encoding_path)
    
    full_df = pd.concat([grid_base,
                         pricefeats.iloc[:, 2:],
                         calfeats.iloc[:, 2:],
                         encodingfeats.iloc[:, 2:]],
                        axis=1)

    del grid_base,pricefeats,calfeats,encodingfeats

    if store_id != 'all':
            full_df = full_df[full_df['store_id'] == store_id]

    full_df = full_df[full_df['d']<=(end_train_day_x+prediction_horizon)]

    lagfeats = pd.read_pickle(lagfeats_path+str(prediction_horizon))
    lagfeats = lagfeats.iloc[:, 3:]
    lagfeats= lagfeats[lagfeats.index.isin(full_df.index)]

    full_df = pd.concat([full_df, lagfeats], axis=1)
    del lagfeats

    enable_features = [col for col in list(full_df) if col not in remove_features]
    full_df = full_df[['id', 'd', target] + enable_features]

    full_df = full_df[full_df['d'] >= start_train_day_x].reset_index(drop=True)

    return full_df, enable_features

**RUN MODEL**

In [None]:
lgb_params = {
    'boosting_type': 'gbdt',
    'objective': 'tweedie',
    'tweedie_variance_power': 1.1,
    'metric': 'rmse',
    'subsample': 0.5,
    'subsample_freq': 1,
    'learning_rate': 0.03,
    'num_leaves': 2 ** 11 - 1,
    'min_data_in_leaf': 2 ** 12 - 1,
    'feature_fraction': 0.5,
    'max_bin': 100,
    'n_estimators': 1400,
    'boost_from_average': False,
}

store_id_set_list = list(train_df['store_id'].unique())

for store_index, store_id in enumerate(store_id_set_list[9:]):
    for prediction_horizon in [7,14,21,28]:
      save_name = str(store_id)+'-'+str(prediction_horizon)+'-'+'.csv'
    
      grid_df,enable_features = load_df(store_id,end_train_day_x,prediction_horizon)

      x_train = grid_df[(grid_df['d'] >= start_train_day_x) & (grid_df['d'] <= end_train_day_x)]
      y_train = x_train[target]
      x_val = grid_df[(grid_df['d'] > (end_train_day_x - prediction_horizon)) & (grid_df['d'] <= end_train_day_x)]
      y_val = x_val[target]
      
      test = grid_df[grid_df['d'] > end_train_day_x]
      
      train_data = lgb.Dataset(x_train[enable_features],
                                      label=y_train)
      
      val_data = lgb.Dataset(x_val[enable_features],
                                      label=y_val)
      
      del grid_df, x_train, y_train
      gc.collect()
      
      estimator = lgb.train(lgb_params,train_data,valid_sets = [val_data], verbose_eval = 100)
      
      val_pred = estimator.predict(x_val[enable_features])
      val_score = np.sqrt(mean_squared_error(val_pred, y_val))
      print(f'Our val rmse score is {val_score}')

      y_pred = estimator.predict(test[enable_features])
      test[target] = y_pred

      predictions = test[['id', 'd', target]]
      predictions = pd.pivot(predictions, index = 'id', columns = 'd', values = target).reset_index()

      predictions.to_csv(save_name,index=False)

[100]	valid_0's rmse: 1.8316
[200]	valid_0's rmse: 1.7652
[300]	valid_0's rmse: 1.74867
[400]	valid_0's rmse: 1.74213
[500]	valid_0's rmse: 1.73716
[600]	valid_0's rmse: 1.73215
[700]	valid_0's rmse: 1.72696
[800]	valid_0's rmse: 1.72394
[900]	valid_0's rmse: 1.7181
[1000]	valid_0's rmse: 1.71552
[1100]	valid_0's rmse: 1.71019
[1200]	valid_0's rmse: 1.70635
[1300]	valid_0's rmse: 1.70099
[1400]	valid_0's rmse: 1.69775
Our val rmse score is 1.6977493624696287
[100]	valid_0's rmse: 1.99575
[200]	valid_0's rmse: 1.91378
[300]	valid_0's rmse: 1.89482
[400]	valid_0's rmse: 1.88123
[500]	valid_0's rmse: 1.87511
[600]	valid_0's rmse: 1.86312
[700]	valid_0's rmse: 1.84908
[800]	valid_0's rmse: 1.84142
[900]	valid_0's rmse: 1.83239
[1000]	valid_0's rmse: 1.82505
[1100]	valid_0's rmse: 1.81674
[1200]	valid_0's rmse: 1.81051
[1300]	valid_0's rmse: 1.80471
[1400]	valid_0's rmse: 1.79718
Our val rmse score is 1.7971811175924952
[100]	valid_0's rmse: 2.15378
[200]	valid_0's rmse: 2.05087
[300]	valid