# Chapter 8 - The SARIMAX Model

## Listing 8-1. Preparing the data and making a plot

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('https://raw.githubusercontent.com/devarajphukan/Kaggle-Walmart/master/train.csv')

data.head()

Unnamed: 0,999,5,Friday,68113152929,-1,FINANCIAL SERVICES,1000
0,30,7,Friday,60538820000.0,1,SHOES,8931.0
1,30,7,Friday,7410811000.0,1,PERSONAL CARE,4504.0
2,26,8,Friday,2238404000.0,2,PAINT AND ACCESSORIES,3565.0
3,26,8,Friday,2006614000.0,2,PAINT AND ACCESSORIES,1017.0
4,26,8,Friday,2006619000.0,2,PAINT AND ACCESSORIES,1017.0


In [None]:
data = data.groupby('Date').sum()
data['IsHoliday'] = data['IsHoliday'] > 0
data['IsHoliday'] = data['IsHoliday'].apply(
    lambda x: float(x)
)

ax = data['Weekly_Sales'].plot()
ax.set_ylabel('Weekly Sales')
plt.gcf().autofmt_xdate()
plt.show()

## Listing 8-2 Is there a correlation between sales and holidays?

In [None]:
data[['Weekly_Sales', 'IsHoliday']].corr()

## Listing 8-3 Fitting a SARIMAX model 

In [None]:
import random
random.seed(12345)
import statsmodels.api as sm
from sklearn.metrics import r2_score

train = data['Weekly_Sales'][:-10]
test = data['Weekly_Sales'][-10:]

mod = sm.tsa.statespace.SARIMAX(
    endog=data['Weekly_Sales'][:-10],
    exog=data['IsHoliday'][:-10],
    order=(0,1,1),
    seasonal_order=(1,1,1,52),
)
res = mod.fit(disp=False)
fcst = res.forecast(steps=10, exog = data['IsHoliday'][-10:])

plt.plot(list(test))
plt.plot(list(fcst))
plt.xlabel('Steps of the test data')
plt.ylabel('Weekly Sales')
plt.legend(['test', 'forecast'])
plt.show()
r2_score(test, fcst)
