In [1]:
#importing necessary libraries
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb
import xgboost as xgb
import math
import pickle
import joblib
import warnings
warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option('display.float_format', '{:.2f}'.format)

In [2]:
#Reading the data from files
calendar_df = pd.read_csv('calendar.csv')
sales_eval_df = pd.read_csv('sales_train_evaluation.csv')
prices_df = pd.read_csv('sell_prices.csv')

In [3]:
print('shape of calendar_df is ',calendar_df.shape)
print('top 2 rows of calendar_df')
calendar_df.head(2)

shape of calendar_df is  (1969, 14)
top 2 rows of calendar_df


Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0


In [4]:
print('shape of sales_eval_df is ',sales_eval_df.shape)
print('top 2 rows of sales_eval_df')
sales_eval_df.head(2)

shape of sales_eval_df is  (30490, 1947)
top 2 rows of sales_eval_df


Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,4,0,0,0,0,3,3,0,1
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,1,2,1,1,0,0,0,0,0


In [5]:
print('shape of prices_df is ',prices_df.shape)
print('top 2 rows of prices_df')
prices_df.head(2)

shape of prices_df is  (6841121, 4)
top 2 rows of prices_df


Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
0,CA_1,HOBBIES_1_001,11325,9.58
1,CA_1,HOBBIES_1_001,11326,9.58


<h3> Feature Engineering </h3>

In [6]:
#reference: https://www.kaggle.com/kyakovlev/m5-simple-fe
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                       df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [7]:
#here we are inserting the columns for test set from days d_1942 to d_1969 which we need to forecast sales as nan 
for i in range(1942,1970):
    sales_eval_df['d_'+str(i)] = np.nan
    sales_eval_df['d_'+str(i)] = sales_eval_df['d_'+str(i)].astype(np.float16)

#to reduce the memory usage by changing the dtypes of columns of the dataframes
calendar_df = reduce_mem_usage(calendar_df)
prices_df = reduce_mem_usage(prices_df)
sales_eval_df = reduce_mem_usage(sales_eval_df)

#to transform the dataframe into vertical rows as each corresponds to each day sales of an item from a particular store
sales_eval_melt_df = pd.melt(sales_eval_df, id_vars=['id','item_id','dept_id','cat_id','store_id','state_id'],
                       var_name='d',value_name='sales')

#changing the dtype of object to category in order to reduce the size of dataframe
for col in sales_eval_melt_df.columns[:6]:
    sales_eval_melt_df[col] = sales_eval_melt_df[col].astype('category')

Mem. usage decreased to  0.12 Mb (41.9% reduction)
Mem. usage decreased to 130.48 Mb (37.5% reduction)
Mem. usage decreased to 102.64 Mb (77.4% reduction)


In [8]:
#creating a single dataframe
sales_eval_melt_df = sales_eval_melt_df.merge(calendar_df,  on='d', how='left')
sales_eval_melt_df = sales_eval_melt_df.merge(prices_df, on=['store_id','item_id','wm_yr_wk'], how='left')

In [9]:
#pre processing missing values of prices by transforming with mean price of that id
sales_eval_melt_df['sell_price'].fillna(sales_eval_melt_df.groupby('id')['sell_price'].transform('mean'),inplace=True)

<h3> Lag features </h3>

In [10]:
#creating lag features such that the for a product on current day it gets it's sales upto 3 months prior.
shifting = 28 #shift period in order to account for 28 days to forecast
for i in tqdm(range(9)): #num of weeks to shift here 8 weeks we consider
    sales_eval_melt_df['lag_'+str(shifting+(7*i))] = sales_eval_melt_df.groupby('id')['sales'].shift(shifting+(7*i)).astype(np.float16)

100%|██████████| 9/9 [00:26<00:00,  2.99s/it]


<h3> Rolling features </h3>

In [11]:
#creating constant shift rolling agg features
for i in tqdm([7,14,28,35,60]):
    sales_eval_melt_df['rolling_mean_'+str(i)] =  sales_eval_melt_df.groupby(['id'])['lag_28'].transform(lambda x: x.rolling(i).mean())
    sales_eval_melt_df['rolling_median_'+str(i)] =  sales_eval_melt_df.groupby(['id'])['lag_28'].transform(lambda x: x.rolling(i).median())

100%|██████████| 5/5 [6:21:35<00:00, 4579.14s/it]  


In [12]:
sales_eval_melt_df.to_pickle("sales_eval_melt_df") #store the dataframe into disk

In [None]:
sales_eval_melt_df = pd.read_pickle("sales_eval_melt_df") #load the dataframe from disk

<h3>Calender features</h3>

In [13]:
#changing dtype of calender features to category
cal_cols = ['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI']
for col in cal_cols:
    sales_eval_melt_df[col] = sales_eval_melt_df[col].astype('category')

In [14]:
sales_eval_melt_df['date'] = pd.to_datetime(sales_eval_melt_df['date'])
#each day of the month
sales_eval_melt_df['day_of_month'] = sales_eval_melt_df['date'].dt.day.astype(np.int8)
#changing year value as 0 for 2011 and 1 for 2012 .... 5 for 2016
sales_eval_melt_df['year'] = (sales_eval_melt_df['year'] - sales_eval_melt_df['year'].min()).astype(np.int8)
#week number of a day in a month ex: 29th in January corresponds to 5th week of January
sales_eval_melt_df['week_no_inmonth'] = sales_eval_melt_df['day_of_month'].apply(lambda x: math.ceil(x/7)).astype(np.int8)
#checking if the day is weekend or not
sales_eval_melt_df['is_weekend'] = (sales_eval_melt_df['wday']<=2).astype(np.int8)

In [15]:
sales_eval_melt_df.to_pickle("sales_eval_melt_calfadd_df") #store the final feature engineered dataframe to disk

In [None]:
sales_eval_melt_df = pd.read_pickle("sales_melt_calfadd_df")

In [17]:
sales_eval_melt_df.head(2)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales,date,wm_yr_wk,...,rolling_median_14,rolling_mean_28,rolling_median_28,rolling_mean_35,rolling_median_35,rolling_mean_60,rolling_median_60,day_of_month,week_no_inmonth,is_weekend
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0.0,2011-01-29,11101,...,,,,,,,,29,5,1
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0.0,2011-01-29,11101,...,,,,,,,,29,5,1


<h3>Pre-processing</h3>

In [18]:
#changing the dtype to category for these columns in order to process the columns with label encoding
cat_cols = ['id','item_id', 'dept_id', 'cat_id', 'store_id', 'state_id', 'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2','snap_CA','snap_TX','snap_WI']
for col in cat_cols:
    sales_eval_melt_df[col] = sales_eval_melt_df[col].astype('category')

In [19]:
#method which return the label encoded columns
def label_encoding(df,cols):
    for col in cols:
        lenc = LabelEncoder()
        df[col] = lenc.fit_transform(df[col].astype(str))
    return df

In [20]:
df_enc = label_encoding(sales_eval_melt_df,cat_cols) #transforming the categorical columns to label encoded columns
df_enc['d'] = df_enc['d'].apply(lambda x: x.split('_')[1]).astype(np.int16) #splitting the values of 'd' comlumn to take only the day number

In [21]:
#final dataframe after pre-processing and feature engineering we are taking last 2 years data to train the ML model
df_final = df_enc.loc[pd.to_datetime(df_enc['date'].dt.date) >= '2014-01-02']

In [22]:
df_final.to_csv("df_final.csv", index=False)

In [23]:
X = df_final.drop(['sales','date','weekday','wm_yr_wk'],axis=1)
X.reset_index(drop=True,inplace=True)
y = df_final['sales']
y.reset_index(drop=True,inplace=True)

<h3>Evaluation metric - WRMSSE</h3>

In [6]:
#https://www.kaggle.com/dhananjay3/wrmsse-evaluator-with-extra-features
#here we are transforming the 30490 timeseries into 42840 time-series by grouping based on the 12 level hirearchy
def convert_to_42840(df, cols, groupbys):
    series_gen = {}
    for i, grp in enumerate(groupbys):
        #grop by corresponding group and calculating aggregate sales of each day
        tmp = df.groupby(grp)[cols].sum()
        #storing the aggregate sale values of each corresponding group
        for j in range(len(tmp)):
            series_gen[gen_series_name(tmp.index[j])] = tmp.iloc[j].values
    return pd.DataFrame(series_gen).T #creating a dataframe of each corresponding group and aggregate sales each day i.e., transformed into 42840 sales

In [7]:
#this method return the name of each group
def gen_series_name(name):
    if isinstance(name, str) | isinstance(name, int):
        return str(name)
    else:
        return "__".join(name)

In [8]:
#here we are computing weights using the last 28 day sales of train data and their prices
def compute_weights(train_df,valid_df,weight_cols,groupbys,fix_cols):
    weights_map = {}
    weight_df = train_df[["item_id", "store_id"] + weight_cols]
    weight_df = pd.melt(weight_df,id_vars=["item_id", "store_id"],var_name='d',value_name='sales')
    weight_df = weight_df.merge(calendar_df[['wm_yr_wk','d']], on='d', how='left')
    weight_df = weight_df.merge(prices_df, how="left", on=["item_id", "store_id", "wm_yr_wk"])
    #computing dollar sales 
    weight_df["dollar_sales"] = weight_df["sales"] * weight_df["sell_price"]
    weight_df = weight_df.set_index(["item_id", "store_id", "d"]).unstack(level=2)["dollar_sales"]
    weight_df = weight_df.loc[zip(train_df.item_id, train_df.store_id), :].reset_index(drop=True)
    weight_df = pd.concat([train_df[fix_cols], weight_df],
                          axis=1, sort=False)
    #computing the weights for each group keys
    for i,grp in enumerate(groupbys):
        ser_weight = weight_df.groupby(grp)[weight_cols].sum().sum(axis=1)
        ser_weight = ser_weight / ser_weight.sum()
        for j in range(len(ser_weight)):
            weights_map[gen_series_name(ser_weight.index[j])] = np.array([ser_weight.iloc[j]])
    weights = pd.DataFrame(weights_map).T / len(groupbys) #creating a dataframe with weights corresponding to each group keys of 42840 hierachical time-series
    return weights

In [9]:
#here we compute the rmsse using the true values and predicted values along with train data which is being used to scale
#train data is used to scale the squared-error as taking the consecutive difference of each day sales
def compute_rmsse(train_df, valid_df, pred_df):
    scale_lst = []
    for i in range(len(train_df)):
        val = train_df.iloc[i].values
        # to consider the periods following the first non-zero demand observed for the series under evaluation.
        val = val[np.argmax(val!=0):]
        #to scale the squared-error as taking the consecutive difference of each day sales
        scale = ((val[1:] - val[:-1]) ** 2).mean()
        #storing the scale value corresponding to each time series
        scale_lst.append(scale)
    scale_arr = np.array(scale_lst)
    #computing mean squared error
    num = ((pred_df - valid_df)**2).mean(axis=1)
    #scaled error i.e., root mean squared scaled error
    rmsse = (num/scale_arr).map(np.sqrt)
    return rmsse

In [10]:
#here we return the final score value i.e., WRMSSE
def custom_metric(train_df, valid_df, pred_df, weights):
    #obtaing RMSSE by calling compute_rmsse function
    rmsse = compute_rmsse(train_df, valid_df, pred_df)
    #WRMSSE of each 42840 time-series is computed as product of corresponding weights and RMSSE respectively
    ser_metric = pd.concat([weights, rmsse], axis=1, sort=False).prod(axis=1)
    return np.sum(ser_metric) #aggregation of each WRMSSE of 42840 time-series to get the final WRMSSE score

<h3>Custom-Ensemble model with hyper-parameter tuning</h3>

In [24]:
#this method returns the dataframes splitted based on specified percentage
def split_data(X,y,per):
    n_rows = int(len(X)*per)
    X1 = X.head(n_rows)
    y1 = y.head(n_rows)
    X2 = X.loc[X.index.difference(X1.index, sort=False)]
    y2 = y.loc[y.index.difference(y1.index, sort=False)]
    return X1,y1,X2,y2

In [25]:
#formulating X_train,X_test & D1,D2 along with their target values
X_train,y_train,X_test,y_test = split_data(X,y,0.8)
D1_x,D1_y,D2_x,D2_y = split_data(X_train,y_train,0.5)

In [26]:
#storing the D1,D2,test dataframes
D1_x.to_pickle('D1_x')
D1_y.to_pickle('D1_y')
D2_x.to_pickle('D2_x')
D2_y.to_pickle('D2_y')
X_test.to_pickle('X_test')
y_test.to_pickle('y_test')

In [11]:
#loading the D1,D2,test dataframes
D1_x = pd.read_pickle("D1_x")
D1_y = pd.read_pickle("D1_y")
D2_x = pd.read_pickle("D2_x")
D2_y = pd.read_pickle("D2_y")
X_test = pd.read_pickle("X_test")
y_test = pd.read_pickle("y_test")

In [12]:
print('shapes of D1,D2,test dataframes',D1_x.shape,D2_x.shape,X_test.shape)

shapes of D1,D2,test dataframes (10976400, 40) (10976400, 40) (5488200, 40)


In [13]:
#this is a custom method where we return the predictions of the test data X_test.
#we train N base models with sampled with replacement D1 data and using those models 
#we get the predictions as features from each model for D2 data and train the meta model 
#with predicted values of D2 based on N models, D2_y
#getting predictions of X_test from N base models we get the final predictions from trained meta model.
def custom_ensemble(D1_x,D1_y,D2_x,D2_y,X_test,y_test,N,base_learner,metaM):
    #defing some of variables to be used dynamically for base models, sample dataframes, features, predictions
    sample_D1_x = ['d'+str(i)+'_x' for i in range(1,N+1)]
    sample_D1_y = ['d'+str(i)+'_y' for i in range(1,N+1)]
    base_models = ['M'+str(i) for i in range(1,N+1)]
    preds_D2_x  = ['pred_d2_'+str(i) for i in range(int(D2_x.iloc[0]['d']),int(D2_x.iloc[-1]['d'])+1)]
    preds_X_test  = ['pred_X_test_'+str(i) for i in range(int(X_test.iloc[0]['d']),int(X_test.iloc[-1]['d'])+1)]
    features_D2_pred = ['D2_f_'+str(i) for i in range(1,N+1)]
    features_X_test_pred = ['X_test_f_'+str(i) for i in range(1,N+1)]
    D2_pred = pd.DataFrame()
    X_test_pred = pd.DataFrame()
    #N represents number of base models
    for i in tqdm(range(N)):
        #getting the sampled with replacement D1_x
        sample_D1_x[i] = D1_x.sample(frac=1,replace=True)
        #getting the sampled with replacement D1_y
        sample_D1_y[i] = D1_y.loc[sample_D1_x[i].index]
        #defing each base model
        base_models[i] = base_learner
        #training each base model
        base_models[i].fit(sample_D1_x[i],sample_D1_y[i])
        #predicting for all the days of D2_x using traing N base models and using them as features
        preds_D2_x[i] = pd.DataFrame()
        for j in range(int(D2_x.iloc[0]['d']),int(D2_x.iloc[-1]['d'])+1):
            preds_D2_x[i]['d_'+str(j)] = base_models[i].predict(D2_x[D2_x['d']==j])
        df1 = pd.melt(preds_D2_x[i],var_name='d',value_name='sales')
        #creating dataframe with features as predictions of D2_x obtained from trained N base models
        D2_pred[features_D2_pred[i]] = df1['sales'].values
        #predicting for all the days of X_test using traing N base models and using them as features
        preds_X_test[i] = pd.DataFrame()
        for k in range(int(X_test.iloc[0]['d']),int(X_test.iloc[-1]['d'])+1):
            preds_X_test[i]['d_'+str(k)] = base_models[i].predict(X_test[X_test['d']==k])
        df2 = pd.melt(preds_X_test[i],var_name='d',value_name='sales')
        #creating dataframe with features as predictions of X_test obtained from trained N base models
        X_test_pred[features_X_test_pred[i]] = df2['sales'].values
    #training meta-model with D2_pred,D2_y
    meta_model = metaM
    #fit the model
    meta_model.fit(D2_pred.values,D2_y.values)
    #getting the predictions for X_test_pred from trained meta-model
    meta_model_preds = meta_model.predict(X_test_pred.values)
    return meta_model_preds

<h4>Tuning with different base-learners and meta-model combinations</h4>

In [14]:
groupbys = ('for_all', 'state_id', 'store_id', 'cat_id', 'dept_id',['state_id', 'cat_id'],  
            ['state_id', 'dept_id'], ['store_id', 'cat_id'],['store_id', 'dept_id'], 'item_id', 
            ['item_id', 'state_id'], ['item_id', 'store_id'])

train_df = pd.concat([sales_eval_df.loc[:,:'state_id'],sales_eval_df.loc[:,'d_1070':]],axis=1,sort=False)
train_df = train_df.iloc[:,:-28]
valid_df = sales_eval_df.iloc[:,-28:].copy()
train_d_cols = [col for col in train_df.columns if col.startswith('d_')]
weight_cols = train_df.iloc[:,-28:].columns.tolist()
train_df['for_all'] = "all" #for level 1 aggregation
fixed_cols = [col for col in train_df.columns if not col.startswith('d_')]
valid_d_cols = [col for col in valid_df.columns if col.startswith('d_')]
if not all([col in valid_df.columns for col in fixed_cols]):
    valid_df = pd.concat([train_df[fixed_cols],valid_df],axis=1,sort=False)
weight_df = compute_weights(train_df,valid_df,weight_cols,groupbys,fixed_cols)
train_42840_df = convert_to_42840(train_df, train_d_cols, groupbys)
valid_42840_df = convert_to_42840(valid_df, valid_d_cols, groupbys)

#getting the X_test predictions for different base-learners and meta-models
model_combs = [(DecisionTreeRegressor(max_depth=10,max_features=10,random_state=42),
                 xgb.XGBRegressor(n_estimators=50,learning_rate=0.05,max_depth=10,n_jobs=-1,
                                  colsample_bytree=0.3,subsample=1,random_state=42)),
                 (DecisionTreeRegressor(max_depth=10,max_features=10,random_state=42),
                  lgb.LGBMRegressor(num_leaves=125,n_estimators=100,learning_rate=0.075,n_jobs=-1)),
                 (xgb.XGBRegressor(n_estimators=50,learning_rate=0.05,max_depth=10,n_jobs=-1,
                                  colsample_bytree=0.3,subsample=1,random_state=42),
                 lgb.LGBMRegressor(num_leaves=125,n_estimators=100,learning_rate=0.075,n_jobs=-1)),
                 (lgb.LGBMRegressor(num_leaves=125,n_estimators=100,learning_rate=0.075,n_jobs=-1),
                 xgb.XGBRegressor(n_estimators=50,learning_rate=0.05,max_depth=10,n_jobs=-1,
                                  colsample_bytree=0.3,subsample=1,random_state=42))]
WRMSSE_custom_model_combs = {}
for i in tqdm(range(len(model_combs))):
    base_learner = model_combs[i][0]
    metaM = model_combs[i][1]
    custom_ensemle_predictions = custom_ensemble(D1_x,D1_y,D2_x,D2_y,X_test,y_test,10,base_learner,metaM)

    #slicing the predictions such that to get each day predictions of all the products of test data
    start = 0
    t = int(X_test.iloc[0]['d'])
    custom_ensemle_predictions_df = pd.DataFrame()
    while start < len(custom_ensemle_predictions):
        end = start + 30490
        custom_ensemle_predictions_df['d_'+str(t)] = custom_ensemle_predictions[start:end]
        start = end
        t = t+1

    forecast_horizon_pred =  custom_ensemle_predictions_df.iloc[:,-56:-28]
    forecast_horizon_pred = pd.concat([valid_df[fixed_cols], forecast_horizon_pred],axis=1,sort=False)
    #prediction data transformed from 30490 timeseries to 42840 hirerachichal time-series
    pred_42840_df = convert_to_42840(forecast_horizon_pred,valid_d_cols,groupbys)
    #Computed WRMSSE for each predictions based on hyper-parameters
    WRMSSE_custom_model_combs[i] = custom_metric(train_42840_df,valid_42840_df,pred_42840_df,weight_df)

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [01:19<11:53, 79.32s/it][A
 20%|██        | 2/10 [02:37<10:31, 78.94s/it][A
 30%|███       | 3/10 [03:56<09:11, 78.86s/it][A
 40%|████      | 4/10 [05:14<07:53, 78.86s/it][A
 50%|█████     | 5/10 [06:34<06:35, 79.02s/it][A
 60%|██████    | 6/10 [07:52<05:15, 78.92s/it][A
 70%|███████   | 7/10 [09:12<03:57, 79.03s/it][A
 80%|████████  | 8/10 [10:32<02:38, 79.42s/it][A
 90%|█████████ | 9/10 [11:51<01:19, 79.27s/it][A
100%|██████████| 10/10 [13:11<00:00, 79.19s/it][A
 25%|██▌       | 1/4 [25:12<1:15:37, 1512.61s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [01:25<12:45, 85.08s/it][A
 20%|██        | 2/10 [02:47<11:14, 84.37s/it][A
 30%|███       | 3/10 [04:10<09:46, 83.75s/it][A
 40%|████      | 4/10 [05:33<08:21, 83.58s/it][A
 50%|█████     | 5/10 [06:56<06:56, 83.37s/it][A
 60%|██████    | 6/10 [08:20<05:34, 83.73s/it][A
 70%|███████   | 7/10 [09:4

In [15]:
print('WRMSSE for different base-learners and meta-model combination is :\n',WRMSSE_custom_model_combs)

WRMSSE for different base-learners and meta-model combination is :
 {0: 0.7727196527957321, 1: 0.7447778931206561, 2: 0.7058861770893176, 3: 0.7641068479229063}


In [16]:
pickle.dump(WRMSSE_custom_model_combs, open('WRMSSE_custom_model_combs', 'wb'))

<h4> After trying with different base-learners and meta-model combinations we observe that XGBRegressor as base-learner and LGBMRegressor as meta-model gives lower WRMSSE benchmark. </h4>
<h5>Fixing base-learner to be XGBRegressor and meta-model to be LGBMRegressor and tuning the number of base-learners 'N' to improve WRMSSE</h5>

In [14]:
groupbys = ('for_all', 'state_id', 'store_id', 'cat_id', 'dept_id',['state_id', 'cat_id'],  
            ['state_id', 'dept_id'], ['store_id', 'cat_id'],['store_id', 'dept_id'], 'item_id', 
            ['item_id', 'state_id'], ['item_id', 'store_id'])

train_df = pd.concat([sales_eval_df.loc[:,:'state_id'],sales_eval_df.loc[:,'d_1070':]],axis=1,sort=False)
train_df = train_df.iloc[:,:-28]
valid_df = sales_eval_df.iloc[:,-28:].copy()
train_d_cols = [col for col in train_df.columns if col.startswith('d_')]
weight_cols = train_df.iloc[:,-28:].columns.tolist()
train_df['for_all'] = "all" #for level 1 aggregation
fixed_cols = [col for col in train_df.columns if not col.startswith('d_')]
valid_d_cols = [col for col in valid_df.columns if col.startswith('d_')]
if not all([col in valid_df.columns for col in fixed_cols]):
    valid_df = pd.concat([train_df[fixed_cols],valid_df],axis=1,sort=False)
weight_df = compute_weights(train_df,valid_df,weight_cols,groupbys,fixed_cols)
train_42840_df = convert_to_42840(train_df, train_d_cols, groupbys)
valid_42840_df = convert_to_42840(valid_df, valid_d_cols, groupbys)

#getting the X_test predictions for different base-learners and meta-models
#fixing the base-learners and meta-model and tuning the number of base-learners
N_list = [12,15,18]
WRMSSE_custom_model_N1 = {}
for n in tqdm(N_list):
    base_learner = xgb.XGBRegressor(n_estimators=50,learning_rate=0.05,max_depth=10,n_jobs=-1,
                                  colsample_bytree=0.3,subsample=1,random_state=42)
    metaM = lgb.LGBMRegressor(num_leaves=125,n_estimators=100,learning_rate=0.075,n_jobs=-1)
    custom_ensemle_predictions = custom_ensemble(D1_x,D1_y,D2_x,D2_y,X_test,y_test,n,base_learner,metaM)

    #slicing the predictions such that to get each day predictions of all the products of test data
    start = 0
    t = int(X_test.iloc[0]['d'])
    custom_ensemle_predictions_df = pd.DataFrame()
    while start < len(custom_ensemle_predictions):
        end = start + 30490
        custom_ensemle_predictions_df['d_'+str(t)] = custom_ensemle_predictions[start:end]
        start = end
        t = t+1

    forecast_horizon_pred =  custom_ensemle_predictions_df.iloc[:,-56:-28]
    forecast_horizon_pred = pd.concat([valid_df[fixed_cols], forecast_horizon_pred],axis=1,sort=False)
    #prediction data transformed from 30490 timeseries to 42840 hirerachichal time-series
    pred_42840_df = convert_to_42840(forecast_horizon_pred,valid_d_cols,groupbys)
    #Computed WRMSSE for each predictions based on hyper-parameters
    WRMSSE_custom_model_N1[n] = custom_metric(train_42840_df,valid_42840_df,pred_42840_df,weight_df)
    print('WRMSSE score is: ',WRMSSE_custom_model_N1)

  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [16:32<3:02:00, 992.79s/it][A
 17%|█▋        | 2/12 [33:11<2:45:45, 994.59s/it][A
 25%|██▌       | 3/12 [49:37<2:28:47, 991.91s/it][A
 33%|███▎      | 4/12 [1:05:47<2:11:23, 985.45s/it][A
 42%|████▏     | 5/12 [1:22:42<1:56:00, 994.34s/it][A
 50%|█████     | 6/12 [1:32:04<1:26:27, 864.55s/it][A
 58%|█████▊    | 7/12 [1:41:41<1:04:51, 778.32s/it][A
 67%|██████▋   | 8/12 [1:54:07<51:14, 768.71s/it]  [A
 75%|███████▌  | 9/12 [2:10:45<41:51, 837.27s/it][A
 83%|████████▎ | 10/12 [2:21:29<25:58, 779.48s/it][A
 92%|█████████▏| 11/12 [2:35:33<13:18, 798.86s/it][A
100%|██████████| 12/12 [2:51:52<00:00, 859.39s/it][A
 33%|███▎      | 1/3 [2:53:26<5:46:52, 10406.33s/it]
  0%|          | 0/15 [00:00<?, ?it/s][A

WRMSSE score is:  {12: 0.7054210382859196}



  7%|▋         | 1/15 [16:38<3:52:56, 998.32s/it][A
 13%|█▎        | 2/15 [33:13<3:36:04, 997.23s/it][A
 20%|██        | 3/15 [48:29<3:14:37, 973.13s/it][A
 27%|██▋       | 4/15 [58:14<2:37:01, 856.48s/it][A
 33%|███▎      | 5/15 [1:08:19<2:10:11, 781.12s/it][A
 40%|████      | 6/15 [1:17:50<1:47:41, 717.94s/it][A
 47%|████▋     | 7/15 [1:27:10<1:29:26, 670.81s/it][A
 53%|█████▎    | 8/15 [1:36:49<1:15:01, 643.01s/it][A
 60%|██████    | 9/15 [1:46:16<1:02:01, 620.31s/it][A
 67%|██████▋   | 10/15 [1:55:49<50:30, 606.11s/it] [A
 73%|███████▎  | 11/15 [2:05:18<39:39, 594.96s/it][A
 80%|████████  | 12/15 [2:14:41<29:16, 585.57s/it][A
 87%|████████▋ | 13/15 [2:24:25<19:29, 584.85s/it][A
 93%|█████████▎| 14/15 [2:33:53<09:39, 579.94s/it][A
100%|██████████| 15/15 [2:42:59<00:00, 651.99s/it][A
 67%|██████▋   | 2/3 [5:37:11<2:50:31, 10231.89s/it]
  0%|          | 0/18 [00:00<?, ?it/s][A

WRMSSE score is:  {12: 0.7054210382859196, 15: 0.7039885143961281}



  6%|▌         | 1/18 [09:59<2:49:46, 599.22s/it][A
 11%|█         | 2/18 [19:46<2:38:51, 595.74s/it][A
 17%|█▋        | 3/18 [29:51<2:29:35, 598.37s/it][A
 22%|██▏       | 4/18 [39:37<2:18:44, 594.61s/it][A
 28%|██▊       | 5/18 [48:46<2:05:52, 580.99s/it][A
 33%|███▎      | 6/18 [58:16<1:55:33, 577.81s/it][A
 39%|███▉      | 7/18 [1:07:49<1:45:38, 576.24s/it][A
 44%|████▍     | 8/18 [1:17:20<1:35:48, 574.85s/it][A
 50%|█████     | 9/18 [1:26:44<1:25:44, 571.60s/it][A
 56%|█████▌    | 10/18 [1:36:02<1:15:38, 567.30s/it][A
 61%|██████    | 11/18 [1:45:26<1:06:04, 566.34s/it][A
 67%|██████▋   | 12/18 [1:55:03<56:57, 569.65s/it]  [A
 72%|███████▏  | 13/18 [2:04:44<47:45, 573.03s/it][A
 78%|███████▊  | 14/18 [2:14:22<38:17, 574.37s/it][A
 83%|████████▎ | 15/18 [2:24:14<28:59, 579.70s/it][A
 89%|████████▉ | 16/18 [2:34:26<19:38, 589.38s/it][A
 94%|█████████▍| 17/18 [2:44:36<09:55, 595.78s/it][A
100%|██████████| 18/18 [2:54:46<00:00, 582.57s/it][A
100%|██████████| 3/3 [8:

WRMSSE score is:  {12: 0.7054210382859196, 15: 0.7039885143961281, 18: 0.7011200983741996}





In [15]:
print('WRMSSE for different number base-learners is :\n',WRMSSE_custom_model_N1)

WRMSSE for different number base-learners is :
 {12: 0.7054210382859196, 15: 0.7039885143961281, 18: 0.7011200983741996}


<h4>Observation</h4>
1. Tuning the number of base-learners we observe that for 'N'(number of base-learners) = 18, gives lower WRMSSE.

In [16]:
pickle.dump(WRMSSE_custom_model_N1, open('WRMSSE_custom_model_N1', 'wb'))

<h5>Getting the test predictions from the custom ensemble with N=18</h5>

In [24]:
custom_ensemle_predictions_df.head(2)

Unnamed: 0,d_1790,d_1791,d_1792,d_1793,d_1794,d_1795,d_1796,d_1797,d_1798,d_1799,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,0.51,0.42,0.34,0.61,0.67,0.56,0.51,0.57,0.51,0.52,...,1.11,1.49,1.35,1.0,0.98,0.91,0.98,1.11,1.26,1.28
1,0.4,0.4,0.3,0.5,0.49,0.38,0.36,0.32,0.31,0.3,...,0.14,0.23,0.32,0.29,0.34,0.33,0.31,0.33,0.35,0.34


In [25]:
custom_ensemle_predictions_df = pd.concat([valid_df[fixed_cols], custom_ensemle_predictions_df],axis=1,sort=False)
custom_ensemle_predictions_df.head(2)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,for_all,d_1790,d_1791,d_1792,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,all,0.51,0.42,0.34,...,1.11,1.49,1.35,1.0,0.98,0.91,0.98,1.11,1.26,1.28
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,all,0.4,0.4,0.3,...,0.14,0.23,0.32,0.29,0.34,0.33,0.31,0.33,0.35,0.34


In [26]:
custom_ensemle_predictions_df.to_csv('custom_ensemle_predictions_df_N18.csv',index=False)

In [27]:
custom_ensemle_predictions_df_N18 = pd.read_csv('custom_ensemle_predictions_df_N18.csv')
custom_ensemle_predictions_df_N18.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,for_all,d_1790,d_1791,d_1792,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,all,0.51,0.42,0.34,...,1.11,1.49,1.35,1.0,0.98,0.91,0.98,1.11,1.26,1.28
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,all,0.4,0.4,0.3,...,0.14,0.23,0.32,0.29,0.34,0.33,0.31,0.33,0.35,0.34
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,all,0.74,0.6,0.63,...,0.68,0.8,0.83,0.63,0.6,0.54,0.64,0.75,0.79,0.78
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,all,1.42,1.46,1.16,...,1.63,2.05,1.97,1.57,1.39,1.41,1.51,1.5,1.92,2.31
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,all,1.08,1.04,0.66,...,1.53,1.83,1.85,1.51,1.41,1.3,1.27,1.34,1.58,1.51


<h4>Getting the extact validation days(1914-1941) predictions and evaluation days(1942-1969) predictions from custom_ensemble model's test predictions forecasted sales from days(1790-1969) and making into correct submission format to get the private leaderboard score </h4>

In [28]:
submit_val = custom_ensemle_predictions_df_N18[['id']]
#validation predictions from days 1914-1941
for i in range(28):
    submit_val['F'+str(i+1)] = custom_ensemle_predictions_df_N18['d_'+str(1914+i)]
submit_val['id'] =  submit_val['id'].apply(lambda x: x.replace('evaluation','validation'))
submit_eval = submit_val.copy()
#evaluation predictions from days 1942-1969
for i in range(28):
    submit_eval['F'+str(i+1)] = custom_ensemle_predictions_df_N18['d_'+str(1942+i)]
submit_eval["id"] = submit_eval["id"].apply(lambda x: x.replace('validation','evaluation'))
submit_custom_ensemble_N18 = submit_val.append(submit_eval).reset_index(drop=True)

In [29]:
submit_custom_ensemble_N18.to_csv("submit_custom_ensemble_N18.csv", index=False)

In [30]:
submit_custom_ensemble_N18.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.97,0.83,0.82,0.79,0.86,1.02,1.02,0.79,0.9,...,0.95,1.3,1.17,1.0,0.92,0.86,0.93,0.99,1.23,1.17
1,HOBBIES_1_002_CA_1_validation,0.37,0.36,0.36,0.36,0.3,0.35,0.33,0.28,0.27,...,0.19,0.24,0.24,0.18,0.18,0.22,0.21,0.18,0.24,0.22
2,HOBBIES_1_003_CA_1_validation,0.38,0.38,0.37,0.37,0.37,0.41,0.46,0.34,0.33,...,0.42,0.65,0.67,0.56,0.58,0.63,0.56,0.62,0.78,0.77
3,HOBBIES_1_004_CA_1_validation,1.99,1.75,1.5,1.47,1.6,1.95,2.19,1.59,1.54,...,1.74,2.1,2.48,1.88,1.63,1.49,1.49,1.67,2.42,2.36
4,HOBBIES_1_005_CA_1_validation,0.93,0.89,0.99,1.11,1.05,1.32,1.58,1.17,1.13,...,1.09,1.28,1.29,0.95,0.92,0.95,0.95,1.06,1.3,1.52


In [31]:
submit_custom_ensemble_N18.tail()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
60975,FOODS_3_823_WI_3_evaluation,0.28,0.26,0.28,0.33,0.38,0.4,0.38,0.3,0.4,...,0.5,0.55,0.71,0.54,0.58,0.62,0.55,0.51,0.66,0.71
60976,FOODS_3_824_WI_3_evaluation,0.43,0.32,0.35,0.46,0.46,0.42,0.38,0.31,0.33,...,0.3,0.35,0.33,0.3,0.29,0.29,0.32,0.29,0.34,0.34
60977,FOODS_3_825_WI_3_evaluation,0.73,0.71,0.76,0.74,0.77,1.01,1.02,0.78,0.75,...,0.81,0.92,0.93,0.82,0.74,0.77,0.67,0.75,0.89,0.99
60978,FOODS_3_826_WI_3_evaluation,0.96,1.06,0.96,0.94,1.05,1.21,1.24,1.0,0.98,...,1.07,1.27,1.32,1.18,1.35,1.21,1.14,1.25,1.49,1.43
60979,FOODS_3_827_WI_3_evaluation,0.86,0.89,0.94,0.87,0.77,1.0,1.01,0.76,0.86,...,1.13,1.18,1.13,1.18,1.28,1.22,1.15,1.26,1.66,1.63


<h4>Pre-training the best model and saving to disk</h4>

In [13]:
#this is a custom method where we return the predictions of the test data X_test.
#we train N base models with sampled with replacement D1 data and using those models 
#we get the predictions as features from each model for D2 data and train the meta model 
#with predicted values of D2 based on N models, D2_y
#getting predictions of X_test from N base models we get the final predictions from trained meta model.
def custom_ensemble(D1_x,D1_y,D2_x,D2_y,X_test,y_test,N,base_learner,metaM):
    #defing some of variables to be used dynamically for base models, sample dataframes, features, predictions
    sample_D1_x = ['d'+str(i)+'_x' for i in range(1,N+1)]
    sample_D1_y = ['d'+str(i)+'_y' for i in range(1,N+1)]
    best_base_models = ['M'+str(i) for i in range(1,N+1)]
    preds_D2_x  = ['pred_d2_'+str(i) for i in range(int(D2_x.iloc[0]['d']),int(D2_x.iloc[-1]['d'])+1)]
    preds_X_test  = ['pred_X_test_'+str(i) for i in range(int(X_test.iloc[0]['d']),int(X_test.iloc[-1]['d'])+1)]
    features_D2_pred = ['D2_f_'+str(i) for i in range(1,N+1)]
    features_X_test_pred = ['X_test_f_'+str(i) for i in range(1,N+1)]
    D2_pred = pd.DataFrame()
    X_test_pred = pd.DataFrame()
    #N represents number of base models
    for i in tqdm(range(N)):
        #getting the sampled with replacement D1_x
        sample_D1_x[i] = D1_x.sample(frac=1,replace=True)
        #getting the sampled with replacement D1_y
        sample_D1_y[i] = D1_y.loc[sample_D1_x[i].index]
        #defing each base model
        file_name = best_base_models[i]+'.pkl'
        best_base_models[i] = base_learner
        #training each base model
        best_base_models[i].fit(sample_D1_x[i],sample_D1_y[i])
        #saving each best base-learners into disk
        joblib.dump(best_base_models[i],file_name)
        #predicting for all the days of D2_x using traing N base models and using them as features
        preds_D2_x[i] = pd.DataFrame()
        for j in range(int(D2_x.iloc[0]['d']),int(D2_x.iloc[-1]['d'])+1):
            preds_D2_x[i]['d_'+str(j)] = best_base_models[i].predict(D2_x[D2_x['d']==j])
        df1 = pd.melt(preds_D2_x[i],var_name='d',value_name='sales')
        #creating dataframe with features as predictions of D2_x obtained from trained N base models
        D2_pred[features_D2_pred[i]] = df1['sales'].values
        #predicting for all the days of X_test using traing N base models and using them as features
        preds_X_test[i] = pd.DataFrame()
        for k in range(int(X_test.iloc[0]['d']),int(X_test.iloc[-1]['d'])+1):
            preds_X_test[i]['d_'+str(k)] = best_base_models[i].predict(X_test[X_test['d']==k])
        df2 = pd.melt(preds_X_test[i],var_name='d',value_name='sales')
        #creating dataframe with features as predictions of X_test obtained from trained N base models
        X_test_pred[features_X_test_pred[i]] = df2['sales'].values
    #training meta-model with D2_pred,D2_y
    best_meta_model = metaM
    #fit the model
    best_meta_model.fit(D2_pred.values,D2_y.values)
    #saving best meta-model into disk
    joblib.dump(best_meta_model,'best_meta_model.pkl')
    #getting the predictions for X_test_pred from trained meta-model
    meta_model_preds = best_meta_model.predict(X_test_pred.values)
    return meta_model_preds

In [14]:
groupbys = ('for_all', 'state_id', 'store_id', 'cat_id', 'dept_id',['state_id', 'cat_id'],  
            ['state_id', 'dept_id'], ['store_id', 'cat_id'],['store_id', 'dept_id'], 'item_id', 
            ['item_id', 'state_id'], ['item_id', 'store_id'])

train_df = pd.concat([sales_eval_df.loc[:,:'state_id'],sales_eval_df.loc[:,'d_1070':]],axis=1,sort=False)
train_df = train_df.iloc[:,:-28]
valid_df = sales_eval_df.iloc[:,-28:].copy()
train_d_cols = [col for col in train_df.columns if col.startswith('d_')]
weight_cols = train_df.iloc[:,-28:].columns.tolist()
train_df['for_all'] = "all" #for level 1 aggregation
fixed_cols = [col for col in train_df.columns if not col.startswith('d_')]
valid_d_cols = [col for col in valid_df.columns if col.startswith('d_')]
if not all([col in valid_df.columns for col in fixed_cols]):
    valid_df = pd.concat([train_df[fixed_cols],valid_df],axis=1,sort=False)
weight_df = compute_weights(train_df,valid_df,weight_cols,groupbys,fixed_cols)
train_42840_df = convert_to_42840(train_df, train_d_cols, groupbys)
valid_42840_df = convert_to_42840(valid_df, valid_d_cols, groupbys)

#getting the X_test predictions for different base-learners and meta-models
#fixing the base-learners and meta-model and number of base-learners to 18

base_learner = xgb.XGBRegressor(n_estimators=50,learning_rate=0.05,max_depth=10,n_jobs=-1,
                                  colsample_bytree=0.3,subsample=1,random_state=42)
metaM = lgb.LGBMRegressor(num_leaves=125,n_estimators=100,learning_rate=0.075,n_jobs=-1)
custom_ensemle_predictions_best = custom_ensemble(D1_x,D1_y,D2_x,D2_y,X_test,y_test,18,base_learner,metaM)

#slicing the predictions such that to get each day predictions of all the products of test data
start = 0
t = int(X_test.iloc[0]['d'])
custom_ensemle_predictions_df = pd.DataFrame()
while start < len(custom_ensemle_predictions_best):
    end = start + 30490
    custom_ensemle_predictions_df['d_'+str(t)] = custom_ensemle_predictions_best[start:end]
    start = end
    t = t+1

forecast_horizon_pred =  custom_ensemle_predictions_df.iloc[:,-56:-28]
forecast_horizon_pred = pd.concat([valid_df[fixed_cols], forecast_horizon_pred],axis=1,sort=False)
#prediction data transformed from 30490 timeseries to 42840 hirerachichal time-series
pred_42840_df = convert_to_42840(forecast_horizon_pred,valid_d_cols,groupbys)
#Computed WRMSSE for each predictions based on hyper-parameters
WRMSSE_custom_model_best = custom_metric(train_42840_df,valid_42840_df,pred_42840_df,weight_df)

100%|██████████| 18/18 [3:25:09<00:00, 683.83s/it]  


In [15]:
print('WRMSSE of the best custom-built ensemble model is', round(WRMSSE_custom_model_best,5))

WRMSSE of the best custom-built ensemble model is 0.6995


In [17]:
custom_ensemle_predictions_df.to_csv('custom_ensemle_predictions_df_best.csv',index=False)

In [18]:
custom_ensemle_predictions_df.head(2)

Unnamed: 0,d_1790,d_1791,d_1792,d_1793,d_1794,d_1795,d_1796,d_1797,d_1798,d_1799,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,0.5,0.41,0.35,0.6,0.66,0.55,0.5,0.56,0.5,0.52,...,1.1,1.49,1.33,0.99,0.98,0.89,1.0,1.1,1.3,1.27
1,0.4,0.4,0.31,0.47,0.49,0.38,0.36,0.33,0.31,0.31,...,0.14,0.25,0.33,0.29,0.35,0.34,0.32,0.33,0.35,0.34


<h3>Conclusions</h3>

1. We use the validation days(1914-1941) sales for hyper-parameter tuning (since we have the true sales values for these validation days) the different base-learners and meta-model combinations as well as the number of base-learners.

2. After trying with different base-learners and meta-model combinations we observe that XGBRegressor as base-learner and LGBMRegressor as meta-model gives lower WRMSSE benchmark.

3. Tuning the number of base-learners we observe that for 'N'(number of base-learners) = 18, gives lower WRMSSE.

4. Getting validation days(1914-1941) sales and evaluation days(1942-1969) sales and after submitting in the given format we get the leaderboard score of **0.66282** which ranks **352** out of **5558** participants and stands in **top 7%**.

5. Of all the models Custom ensemble model performs well as it is able to forecast sales with lower score(WRMSSE) = 0.66282 private score.

##### Kaggle score of all models
<img src="https://imgur.com/xKld7GP.png"/>

##### Kaggle leaderboard rank of best performing model
<img src="https://imgur.com/Ia3Hn1s.png"/>