# Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import lightgbm as lgb
import matplotlib.pyplot as plt

from pandas.plotting import register_matplotlib_converters
from IPython.display import display
from tqdm import tqdm
# from tsa_functions import *
from tsa_tools import *  # See last cell

register_matplotlib_converters()
sns.set_style('darkgrid')

np.set_printoptions(precision=4)
pd.set_option('precision', 4)

import statsmodels.api as sm
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.exponential_smoothing.ets import ETSModel

# Loading the Data

In [2]:
df_calendar = pd.read_csv('../data/m5/calendar.csv')
df_price = pd.read_csv('../data/m5/sell_prices.csv')
df_sales = pd.read_csv('../data/m5/sales_train_validation.csv')
df_sales1 = pd.read_csv('../data/m5/sales_train_evaluation.csv')
df_weights = pd.read_csv('../data/m5/weights_validation.csv')

In [61]:
full_df = (df_sales1.set_index([*df_sales.columns[5::-1]]).T
           .set_index(pd.DatetimeIndex(df_calendar.date)[:1941]))

In [63]:
lvl9 = full_df.sum(level=["store_id", "dept_id"], axis=1)
lvl9.head()

store_id,CA_1,CA_1,CA_1,CA_1,CA_1,CA_1,CA_1,CA_2,CA_2,CA_2,...,WI_2,WI_2,WI_2,WI_3,WI_3,WI_3,WI_3,WI_3,WI_3,WI_3
dept_id,HOBBIES_1,HOBBIES_2,HOUSEHOLD_1,HOUSEHOLD_2,FOODS_1,FOODS_2,FOODS_3,HOBBIES_1,HOBBIES_2,HOUSEHOLD_1,...,FOODS_1,FOODS_2,FOODS_3,HOBBIES_1,HOBBIES_2,HOUSEHOLD_1,HOUSEHOLD_2,FOODS_1,FOODS_2,FOODS_3
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2011-01-29,528,28,361,181,297,674,2268,522,16,529,...,191,155,1269,256,22,584,148,152,583,2293
2011-01-30,489,9,350,170,284,655,2198,381,16,461,...,184,128,1121,342,14,541,195,138,585,2383
2011-01-31,409,6,279,114,214,396,1398,352,16,306,...,154,199,1233,228,20,420,106,127,575,1841
2011-02-01,383,9,278,123,175,476,1607,344,6,270,...,205,244,1564,183,11,327,94,98,533,1965
2011-02-02,263,5,195,135,182,354,1496,283,13,217,...,96,147,724,70,4,151,53,87,340,1427


# Pre-procesing

## Pre-processing Full Dataset

In [40]:
# full_df_prep = (full_dflvl9.apply(lambda x: np.where(x < 10,  np.nan, x))
#                 .interpolate(method='linear', axis=0)).fillna(method='bfill')

## Pre-processing Lvl9

In [64]:
# No of values with less than 10 sales per day
lvl9[lvl9 < 10].count(axis=0).sum()

2403

In [65]:
df_lvl9 = (lvl9.apply(lambda x: np.where(x < 10,  np.nan, x))
            .interpolate(method='linear', axis=0)).fillna(method='bfill')

In [66]:
# To check if there are still values less than 10
df_lvl9[df_lvl9 < 10].count(axis=0).sum()

0

# Q1. Baseline Methods

In [67]:
train = df_lvl9.iloc[ :1913,:]
test = df_lvl9.iloc[ 1913:,:]

In [68]:
base_forcast = {}
h = 28
m = 7


# for bm in baseline:
naive = []
snaive = []
ses = []
hl = []
ahl = []
for x in train.columns:
    naive.append(naivef(train[x], h))
    snaive.append(snaivef(train[x], h, m))

    model1 =  ETSModel(train[x].values).fit() # SES
    model2 =  ETSModel(train[x].values, trend="add", seasonal=None).fit() # Holt's Linear
    model3 =  ETSModel(train[x], error="add", trend="add", 
                        seasonal="add", damped_trend=False, 
                        seasonal_periods=m).fit() # Additive Holt-Winter

    ses.append(model1.forecast(h))
    hl.append(model2.forecast(h))
    ahl.append(model3.forecast(h))

base_forcast['Naive'] = naive
base_forcast['Seasonal Naive'] = snaive
base_forcast['SES'] = ses
base_forcast['Holt\'s Linear'] = hl
base_forcast['Additive Holt-Winters'] = ahl


In [69]:
rmsse_res = {}

for k in base_forcast.keys():
    rmsse_list = []
    for x in range(len(train.columns)):
        rmsse_list.append(rmsse(test[train.columns[x]], 
                                base_forcast[k][x], 
                                train[train.columns[x]]))
    rmsse_res[k] = rmsse_list


In [None]:
pd.set_option('max_rows', None)
lvl9_rmsse = pd.DataFrame(rmsse_res, index=train.columns)

lvl9_weights = (df_weights[df_weights['Level_id'] == 'Level9']
                .set_index(['Agg_Level_1', 'Agg_Level_2'])[['Weight']])

lvl9_fin = pd.concat([lvl9_rmsse, lvl9_weights], axis=1)
lvl9_fin


Unnamed: 0,Unnamed: 1,Naive,Seasonal Naive,SES,Holt's Linear,Additive Holt-Winters,Weight
CA_1,FOODS_1,0.9319,0.7312,0.9179,0.9362,0.7301,0.0052
CA_1,FOODS_2,2.0535,0.8269,2.0534,2.3136,0.593,0.0149
CA_1,FOODS_3,1.7113,0.4944,1.0825,1.0185,0.5045,0.0419
CA_1,HOBBIES_1,1.4583,0.7619,0.8819,0.881,0.6311,0.0172
CA_1,HOBBIES_2,1.934,1.1429,0.8831,0.8748,0.7203,0.0006
CA_1,HOUSEHOLD_1,2.1052,0.5172,1.1514,1.1636,0.4433,0.0228
CA_1,HOUSEHOLD_2,2.2997,0.5228,1.2208,1.1789,0.5276,0.0083
CA_2,FOODS_1,1.2557,1.1311,1.3875,1.3786,0.9718,0.0098
CA_2,FOODS_2,4.2936,1.4906,2.3041,2.2415,1.2994,0.013
CA_2,FOODS_3,2.583,0.6046,1.4502,1.4114,0.4787,0.0375


In [73]:
lvl9_wrmsse = {'Naive': sum(lvl9_fin['Naive'] * lvl9_fin['Weight']),
               'S. Naive': sum(lvl9_fin['Seasonal Naive'] * 
                                lvl9_fin['Weight']),
               'SES': sum(lvl9_fin['SES'] * lvl9_fin['Weight']),
               'Holt\'s Linear': sum(lvl9_fin['Holt\'s Linear'] * 
                                      lvl9_fin['Weight']),
               'Additive Holt-Winters': sum(lvl9_fin['Additive Holt-Winters'] 
                                             * lvl9_fin['Weight'])}
pd.DataFrame.from_dict(lvl9_wrmsse, orient='index', columns=['WRMSSE'])

Unnamed: 0,WRMSSE
Naive,1.6286
S. Naive,0.93
SES,1.2075
Holt's Linear,1.2367
Additive Holt-Winters,0.8549


In [72]:
k = naivef(train[('CA_1', 'HOBBIES_1')], 28)
rmsse(test[('CA_1', 'HOBBIES_1')], k, train[('CA_1', 'HOBBIES_1')])

1.458257712456372

# Q7: Top-Down Function

In [None]:
def compute_topdown(df_pred, level, approach='AHP'):
    """Returns"""

In [None]:
df_full