# Store item demand forcasting

In [1]:
__author__ = "Zhiji Ding"
__copyright__ = "Copyright 2018, Zhiji Ding"
__email__ = "jimmydzj2006@gmail.com"

In [2]:
# Module import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as matplot
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [3]:
# load data
train_df=pd.read_csv('/Users/jimmyding/Downloads/demand_forecasting_kernels_only/train.csv')
test_df=pd.read_csv('/Users/jimmyding/Downloads/demand_forecasting_kernels_only/test.csv')

In [4]:
train_df.head()

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10


In [5]:
train_df['date']=pd.to_datetime(train_df['date'])
train_df['week_day']=train_df['date'].dt.weekday_name
train_df['month']=train_df['date'].dt.month
train_df['year']=train_df['date'].dt.year
train_df=train_df.set_index(['date'])

In [6]:
train_df.head()

Unnamed: 0_level_0,store,item,sales,week_day,month,year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-01,1,1,13,Tuesday,1,2013
2013-01-02,1,1,11,Wednesday,1,2013
2013-01-03,1,1,14,Thursday,1,2013
2013-01-04,1,1,13,Friday,1,2013
2013-01-05,1,1,10,Saturday,1,2013


In [None]:
item_group=train_df.groupby(['store','item'])
item_group

## one item only

In [None]:
df=item_group.get_group((2,2))
df.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df['sales'], model='additive', freq=365)

fig = plt.figure()  
fig = result.plot()  
fig.set_size_inches(15, 12)

In [None]:
# stationary test

from statsmodels.tsa.stattools import adfuller
def stationarity_test(timeseries, window = 12, cutoff = 0.05):
    '''Performing Dickey_Fuller Test and return p-value'''
    
    dftest = adfuller(timeseries, autolag='AIC', maxlag = 20 )
    
    pvalue = dftest[1]
    
    return pvalue


In [None]:
stationarity_test(df['sales'])

The time series is stationary according to statistics but shows annual trend.

In [None]:
first_diff = df.sales - df.sales.shift(1)
first_diff = first_diff.dropna(inplace = False)
stationarity_test(first_diff, window = 12)

p_value is extremely small thus null hypothesis is rejected. The time series is stationary after 1st differencing.
d=1
## MA and ACF

In [None]:
import statsmodels.api as sm

# Autocorrelation of the original data

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(df.sales, lags=40, ax=ax1) # 
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(df.sales, lags=40, ax=ax2)# , lags=40

In [None]:
# Autocorrelation from first differencing

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(first_diff, lags=40, ax=ax1) # 
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(first_diff, lags=40, ax=ax2)# , lags=40

In [None]:
resDiff = sm.tsa.arma_order_select_ic(first_diff, max_ar=7, max_ma=7, ic='aic', trend='c')
print('ARMA(p,q) =',resDiff['aic_min_order'],'is the best.')

In [None]:
arima = sm.tsa.statespace.SARIMAX(df.sales,order=(6,1,7),freq='D',seasonal_order=(0,0,0,0),\
        enforce_stationarity=False, enforce_invertibility=False,).fit()
arima.summary()

In [None]:
res = arima.resid
fig,ax = plt.subplots(2,1,figsize=(15,8))
fig = sm.graphics.tsa.plot_acf(res, lags=50, ax=ax[0])
fig = sm.graphics.tsa.plot_pacf(res, lags=50, ax=ax[1])
plt.show()

In [None]:
arima.plot_diagnostics(figsize=(18, 8))
plt.show()

In [None]:
#arima.predict('2018-01-01','2018-08-31')

## Define function for each item

In [None]:
test_df.head()

In [None]:
# prepare test data
test_item_group=test_df.groupby(['store','item'])


In [None]:
def arima_model(df, start, end):
    """To build and store arima models for each item to be called for the groupby object."""
    
    # Step 1: stationary test to determine d
    if stationarity_test(df.sales)<0.01:
        d=0
    else:
        diff = df.sales - df.sales.shift(1)
        diff = diff.dropna(inplace = False)
        d=1
        while stationarity_test(diff, window = 12)>0.01:
            diff = diff - diff.shift(1)
            diff = diff.dropna(inplace = False)
            d=d+1
    
    # Step 2: Selection of p and q
    resDiff = sm.tsa.arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='aic', trend='c')
    p=resDiff['aic_min_order'][0]
    q=resDiff['aic_min_order'][1]
    
    # Step 3: Build arima model:
    arimax = sm.tsa.statespace.SARIMAX(df.sales,order=(p,d,q),freq='D',seasonal_order=(0,0,0,0),\
            enforce_stationarity=False, enforce_invertibility=False,).fit()
    
    # Step 4: Prediction
    return arimax.predict(start,end)

In [None]:
df_2_2=item_group.get_group((2,2))
m=arima_model(df_2_2, '2018-01-01','2018-08-31')

In [None]:
m

In [None]:
for name, group in item_group:     
    print(name)    
    #print(pd.DataFrame(group))
    