In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format='retina'

from __future__ import absolute_import, division, print_function

import pandas as pd
import numpy as np
from np import mean
import os
import csv
import itertools
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
from statsmodels.tsa.stattools import adfuller
from pandas import datetime
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from sklearn.metrics import mean_squared_error
from math import sqrt
# Display and Plotting
import matplotlib.pylab as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, minmax_scale, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from ipywidgets import interactive, widgets, RadioButtons, ToggleButton, Select, FloatSlider, FloatRangeSlider, IntSlider, fixed

print(os.listdir("../input"))

pd.set_option('display.float_format', lambda x: '%.5f' % x) # pandas
np.set_printoptions(precision=5, suppress=True) # numpy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)


sns.set(style='ticks', context='poster')

In [None]:
def sarima_forecast(history, config):
    order, sorder, trend = config
    # define model
    model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    model_fit = model.fit(disp=False)
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

In [None]:
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [None]:
df = pd.read_csv('../input/BITS AIC 2019 - Reflexis Raw Dataset.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index(['DATE']).dropna()
#df['SALES_ACTUAL'].plot()
df = df[df.index.get_level_values('DATE') != '2016-01-31']
df.head()

In [None]:
df

In [None]:
#setting seasons
df2 = df.groupby('STORE')['SALES_ACTUAL'].resample('W').sum().reset_index()
#df2 = df2.merge(df1.groupby('STORE')['TRAFFIC_ACTUAL'].resample('W').sum().reset_index(level=0))
#df2
df2 = df2.merge(df.groupby('STORE')['TRAFFIC_ACTUAL'].resample('W').sum().reset_index())
df2 = df2.merge(df.groupby('STORE')['TRANSACTIONS_ACTUAL'].resample('W').sum().reset_index())
df2 = df2.merge(df.groupby('STORE')['MANAGER_SCHED_HOURS'].resample('W').mean().reset_index())
df2 = df2.merge(df.groupby('STORE')['SYSTEM_SCHED_HOURS'].resample('W').mean().reset_index())
df2 = df2.set_index('DATE')
df2

In [None]:
print('Max sales = ', df['SALES_ACTUAL'].max())
print('Number of records =', len(df))
df.head()

In [None]:
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

In [None]:
split_date = '2018-01-01'
X_train, Y_train, X_test, Y_test = data[data.index < split_date][['STORE','MANAGER_SCHED_HOURS','SYSTEM_SCHED_HOURS']], data[data.index < split_date]['SALES_DELTA_NORM'], data[data.index> split_date][['STORE','MANAGER_SCHED_HOURS','SYSTEM_SCHED_HOURS']], data[data.index > split_date]['SALES_DELTA_NORM']

In [None]:
class MetricsCheckpoint(Callback):
    """Callback that saves metrics after each epoch"""
    def __init__(self, savepath):
        super(MetricsCheckpoint, self).__init__()
        self.savepath = savepath
        self.history = {}
    def on_epoch_end(self, epoch, logs=None):
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        np.save(self.savepath, self.history)
        
class MetricsCheckpoint(Callback):
    """Callback that saves metrics after each epoch"""
    def __init__(self, savepath):
        super(MetricsCheckpoint, self).__init__()
        self.savepath = savepath
        self.history = {}
    def on_epoch_end(self, epoch, logs=None):
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        np.save(self.savepath, self.history)

def plotKerasLearningCurve():
    plt.figure(figsize=(10,5))
    metrics = np.load('logs.npy')[()]
    filt = ['mean_absolute_percentage_error'] # try to add 'loss' to see the loss learning curve
    for k in filter(lambda x : np.any([kk in x for kk in filt]), metrics.keys()):
        l = np.array(metrics[k])
        plt.plot(l, c= 'r' if 'val' not in k else 'b', label='val' if 'val' in k else 'train')
        x = np.argmin(l)
        y = l[x]
        plt.scatter(x,y, lw=0, alpha=0.25, s=100, c='r' if 'val' not in k else 'b')
        plt.text(x, y, '{} = {:.4f}'.format(x,y), size='15', color= 'r' if 'val' not in k else 'b')   
    plt.legend(loc=4)
    plt.axis([0, None, None, None]);
    plt.grid()
    plt.xlabel('Number of epochs')
    plt.ylabel('MAPE')
    
def plot_learning_curve(history):
    plt.figure(figsize=(8,8))
    plt.subplot(1,2,1)
    plt.plot(history.history['mean_absolute_percentage_error'])
    plt.plot(history.history['val_mean_absolute_percentage_error'])
    """plt.plot(scalar.inverse_transform(history.history['loss']))
    plt.plot(scalar.inverse_transform(history.history['val_loss']))"""
    plt.title('model MAPE')
    plt.ylabel('MAPE')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig('./accuracy_curve.png')
    #plt.clf()
    # summarize history for loss
    plt.subplot(1,2,2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig('./loss_curve.png')

In [None]:
#General scheme for feature visaulization is to iterate over all possible inputs
store_no = 203
max_input = [[[store_no, 0.0,0.0]]]
For i in range(24*7*4):
    For j in range(24*7*4):
        if (lstm_model.predict([[[store_no, float(i)/4.0, float(j)/4.0]]])[0]>lstm_model.predict(max_input)[0]):
            max_input = [[[store_no, float(i)/4.0, float(j)/4.0]]]
            
