# Store item demand

In [None]:
__author__ = "Zhiji Ding"
__copyright__ = "Copyright 2020, Zhiji Ding"
__email__ = "zhiji.ding@gmail.com"

In [1]:
# Module import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as matplot
import seaborn as sns
import statsmodels.api as sm

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# load data
train_df=pd.read_csv('/Users/jimmyding/Downloads/demand_forecasting_kernels_only/train.csv')
test_df=pd.read_csv('/Users/jimmyding/Downloads/demand_forecasting_kernels_only/test.csv')

In [3]:
train_df.head()

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10


In [4]:
test_df.head()

Unnamed: 0,id,date,store,item
0,0,2018-01-01,1,1
1,1,2018-01-02,1,1
2,2,2018-01-03,1,1
3,3,2018-01-04,1,1
4,4,2018-01-05,1,1


In [5]:
train_df['date']=pd.to_datetime(train_df['date'])
train_df['week_day']=train_df['date'].dt.weekday_name
train_df['month']=train_df['date'].dt.month
#train_df['year']=train_df['date'].dt.year
train_df=train_df.set_index(['date'])
train_df.head()

Unnamed: 0_level_0,store,item,sales,week_day,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-01,1,1,13,Tuesday,1
2013-01-02,1,1,11,Wednesday,1
2013-01-03,1,1,14,Thursday,1
2013-01-04,1,1,13,Friday,1
2013-01-05,1,1,10,Saturday,1


In [6]:
train_df['week_day'] = np.where((train_df.week_day.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])),'weekday',train_df.week_day)

train_df['week_day'] = np.where((train_df.week_day.isin(['Saturday','Sunday'])),'weekend',train_df.week_day)
train_df


Unnamed: 0_level_0,store,item,sales,week_day,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-01,1,1,13,weekday,1
2013-01-02,1,1,11,weekday,1
2013-01-03,1,1,14,weekday,1
2013-01-04,1,1,13,weekday,1
2013-01-05,1,1,10,weekend,1
...,...,...,...,...,...
2017-12-27,10,50,63,weekday,12
2017-12-28,10,50,59,weekday,12
2017-12-29,10,50,74,weekday,12
2017-12-30,10,50,62,weekend,12


In [7]:
import datetime
# one hot encoding
train_df=pd.get_dummies(data=train_df, columns=['week_day'])
#train_df.head()


train_df['prev_quarter'] = train_df['sales'].shift(91)
train_df['prev_year'] = train_df['sales'].shift(364)

train_df.dropna(inplace=True)
train_df.head()



Unnamed: 0_level_0,store,item,sales,month,week_day_weekday,week_day_weekend,prev_quarter,prev_year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-12-31,1,1,15,12,1,0,12.0,13.0
2014-01-01,1,1,9,1,1,0,11.0,11.0
2014-01-02,1,1,14,1,1,0,14.0,14.0
2014-01-03,1,1,11,1,1,0,14.0,13.0
2014-01-04,1,1,20,1,0,1,14.0,10.0


In [8]:
train=train_df.loc[train_df.index<pd.to_datetime('October 1, 2017')]
test=train_df.loc[train_df.index>=pd.to_datetime('October 1, 2017')]
train.head()

Unnamed: 0_level_0,store,item,sales,month,week_day_weekday,week_day_weekend,prev_quarter,prev_year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-12-31,1,1,15,12,1,0,12.0,13.0
2014-01-01,1,1,9,1,1,0,11.0,11.0
2014-01-02,1,1,14,1,1,0,14.0,14.0
2014-01-03,1,1,11,1,1,0,14.0,13.0
2014-01-04,1,1,20,1,0,1,14.0,10.0


In [9]:
item_group_train=train.groupby(['store','item'])
item_group_test=test.groupby(['store','item'])

In [10]:
test_df['date']=pd.to_datetime(test_df['date'])
test_df=test_df.set_index(['date'])
test_item_group=test_df.groupby(['store','item'])

In [11]:
def SMAPE (forecast, actual):
    """Returns the Symmetric Mean Absolute Percentage Error between two Series"""
    try:
        diff = abs(forecast - actual)
        avg = abs(forecast) + abs(actual)
        m=diff/avg
    except ZeroDivisionError:
        m=0
    else:
        return(round(sum(m)/len(forecast) * 100, 2))

# ARIMA

In [12]:
# stationary test function

from statsmodels.tsa.stattools import adfuller
def stationarity_test(timeseries, window = 12, cutoff = 0.05):
    '''Performing Dickey_Fuller Test and return p-value'''
    
    dftest = adfuller(timeseries, autolag='AIC', maxlag = 20 )
    
    pvalue = dftest[1]
    
    return pvalue

In [13]:
def arima_model(df, start, end, exog_test):
    """To build and store arima models for each item to be called for the groupby object."""
    
    # Step 1: stationary test to determine d
    if stationarity_test(df.sales)<0.01:
        d=0
    else:
        diff = df.sales - df.sales.shift(1)
        diff = diff.dropna()
        d=1
        while stationarity_test(diff, window = 12)>0.01:
            diff = diff - diff.shift(1)
            diff = diff.dropna()
            d=d+1
    
    # Step 2: Selection of p and q
    #resDiff = sm.tsa.arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='aic', trend='c')
    #p=resDiff['aic_min_order'][0]
    #q=resDiff['aic_min_order'][1]
    
    # Step 3: Build arima model:
    arimax = sm.tsa.statespace.SARIMAX(endog=df.sales, exog=df.drop(['store', 'item','month', 'sales'], axis=1),\
            order=(6,d,7),seasonal_order=(6,d,7,1),freq='D',\
            trend='ct',enforce_stationarity=False, enforce_invertibility=False).fit()
            #initialization='approximate_diffuse'
    
    # Step 4: Prediction
    
    return pd.DataFrame({'store':int(df['store'][0]),'item':int(df['item'][0]),'sales':arimax.predict(start,end,dynamic=True, exog=exog_test)})

In [14]:
import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())

results=pd.DataFrame(columns=['store','item','sales'])


In [15]:

out = pd.concat(pool.starmap(arima_model, [(group,\
                                            list(item_group_test.get_group(name).index)[0],\
                                            list(item_group_test.get_group(name).index)[-1],\
                                            item_group_test.get_group(name).drop(['store', 'item','month', 'sales'], axis=1))\
                                            for name,group in item_group_train]))
pool.close()

In [16]:
out.head()

Unnamed: 0,store,item,sales
2017-10-01,1,1,26.642178
2017-10-02,1,1,19.409384
2017-10-03,1,1,20.984217
2017-10-04,1,1,20.399083
2017-10-05,1,1,22.484257


In [17]:
SMAPE(out['sales'],test['sales'])

29.82

# Prophet

In [None]:
from fbprophet import Prophet

In [None]:
# data preparation

df_pp=train_df.reset_index()
df_pp.columns=['ds','store','item','y']
df_pp

In [None]:
train=df_pp.loc[df_pp['ds']<pd.to_datetime('October 1, 2017')]
test=df_pp.loc[df_pp['ds']>=pd.to_datetime('October 1, 2017')]

In [None]:
# create groupby object
item_group_train=train.groupby(['store','item'])
item_group_test=test.groupby(['store','item'])

In [None]:
def prophet_model(df_train,df_test):
    p=Prophet(interval_width=0.95, \
                  growth='linear',\
                  yearly_seasonality=False, \
                  weekly_seasonality=False, \
                  daily_seasonality=False,\
                  seasonality_mode='additive',\
                  seasonality_prior_scale=10,\
                  holidays_prior_scale=10,\
                  changepoint_prior_scale=10)
    p.add_country_holidays(country_name='US')
    p.add_seasonality(name='yearly', period=365.5, fourier_order=10, prior_scale=40)
    p.add_seasonality(name='weekly', period=7, fourier_order=3, prior_scale=10)
    p.add_seasonality(name='monthly', period=30.5, fourier_order=5, prior_scale=1)
    p.fit(df_train)
    return pd.DataFrame({'store':int(df_test['store'][0]),\
                         'item':int(df_test['item'][0]),\
                         'sales':p.predict(pd.DataFrame(df_test['ds']))['yhat']})

In [None]:
pool = mp.Pool(mp.cpu_count())
out_prophet = pd.concat(pool.starmap(prophet_model, [(group[['ds','y']],\
                                                      item_group_test.get_group(name).reset_index())\
                                                      for name, group in item_group_train
                                                      ]))
pool.close()
out_prophet

In [None]:
SMAPE(out_prophet.reset_index()['sales'],test.reset_index()['y'])