<a href="https://colab.research.google.com/github/anirudh201098/Store-Item-Demand-Forecasting/blob/master/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data=pd.read_csv('/content/drive/My Drive/temp/train.csv',parse_dates=['date'],index_col=['date'])

In [None]:
data= data.sort_values('date', ascending=True)

In [None]:
test=data[data.index.year==2017]
test.reset_index(level=0,inplace=True)
train=data[data.index.year!=2017]
train.reset_index(level=0,inplace=True)

In [None]:

train_data = pd.DataFrame({'year': train['date'].dt.year-2013, 'month': train['date'].dt.month, 'day': train['date'].dt.day, 
                       'weekday': train['date'].dt.weekday,
                       'store': train.store, 'item': train.item,'sales':train.sales
                      }, columns = ['year', 'month', 'day', 'weekday', 'store', 'item','sales'],
                    )



test_data = pd.DataFrame({'year': test['date'].dt.year-2013, 'month': test['date'].dt.month, 'day': test['date'].dt.day, 
                       'weekday': test['date'].dt.weekday,
                       'store': test.store, 'item': test.item,'sales':test.sales
                      }, columns = ['year', 'month', 'day', 'weekday', 'store', 'item','sales'],
                    )


In [None]:
np.unique(train_data['year'])

In [None]:
X=np.array(train_data.drop('sales',axis=1))
y=np.array(train_data['sales'])
X_test=np.array(test_data.drop('sales',axis=1))
y_test=np.array(test_data['sales'])

In [None]:
from sklearn.model_selection import train_test_split
def split(random,n,X_train,y_train,m):
  random_split =random
  train_ratio = n

  if random_split:
      X_train1, X_val, y_train1, y_val = train_test_split(X_train, y_train, test_size=(1-train_ratio), random_state=0, shuffle = True)
  else:
    if m==1:
        X_train1 = X_train[(X_train[:,0]!=n)|(X_train[:,1]>6)]
        y_train1 = y_train[(X_train[:,0]!=n)|(X_train[:,1]>6)]
        X_val = X_train[(X_train[:,0]==n)&(X_train[:,1]<=6)]
        y_val = y_train[(X_train[:,0]==n)&(X_train[:,1]<=6)]
    else: 
        X_train1 = X_train[(X_train[:,0]!=n)|(X_train[:,1]<6)]
        y_train1 = y_train[(X_train[:,0]!=n)|(X_train[:,1]<6)]
        X_val = X_train[(X_train[:,0]==n)&(X_train[:,1]>=6)]
        y_val = y_train[(X_train[:,0]==n)&(X_train[:,1]>=6)]

  return X_train1,y_train1,X_val,y_val

X_train,y_train,X_val,y_val=split(False,0,X,y,0)
print("training: ", X_train.shape, y_train.shape)
print("validation: ", X_val.shape, y_val.shape)

In [None]:
from keras.models import Model as KerasModel
from keras.layers import Input, Dense, Activation, Reshape, Dropout,LSTM,Flatten
from keras.layers import Concatenate
from keras.layers.embeddings import Embedding
from keras import optimizers, regularizers
from keras.callbacks import EarlyStopping
import keras.backend as K
import keras
import tensorflow as tf


In [None]:
def split_features(X):
    X_list = []
    for i in range(6):
        X_list.append(X[:,i])
    
    return X_list

# custom loss function
def smape(x, y):
    return 100.*K.mean(2*K.abs(x-y)/(K.abs(x)+K.abs(y)))


class LSTMwithEmbeddings():
    
    def __init__(self, X_train, y_train, X_val, y_val):
        super().__init__()
        self._build_model()
        #self.fit(X_train, y_train, X_val, y_val)
        
    def preprocess(self, X):
        X_list = split_features(X)
        return X_list
        
    def _build_model(self):
        ## year is a continuous feature
        inp_year = Input(shape=(1,), name="year")
        out_year = Embedding(5+1,3,name='year__embedding')(inp_year)
        out_year = Reshape(target_shape=(3,))(out_year)
        
        ## all other features are categorical and need embedding
        inp_month = Input(shape=(1,))
        out_month = Embedding(12+1, 7, name='month_embedding')(inp_month)
        out_month = Reshape(target_shape=(7,))(out_month)
        
        inp_day = Input(shape=(1,))
        out_day = Embedding(31+1, 16, name='day_embedding')(inp_day)
        out_day = Reshape(target_shape=(16,))(out_day)
        
        inp_weekday = Input(shape=(1,))
        out_weekday = Embedding(7+1, 4, name='weekday_embedding')(inp_weekday)
        out_weekday = Reshape(target_shape=(4,))(out_weekday)
        
        inp_stores = Input(shape=(1,))
        out_stores = Embedding(10+1, 6, name='stores_embedding')(inp_stores)
        out_stores = Reshape(target_shape=(6,))(out_stores)
        
        inp_items = Input(shape=(1,))
        out_items = Embedding(50+1, 26, name='items_embedding')(inp_items)
        out_items = Reshape(target_shape=(26,))(out_items)
        
        
        inp_model = [inp_year, inp_month, inp_day, inp_weekday, inp_stores, inp_items]
        out_embeddings = [out_year, out_month, out_day, out_weekday, out_stores, out_items]
        
        out_model = Concatenate()(out_embeddings)
        #out_model = Dense(1000)(out_model)
        #out_model = Activation('relu')(out_model)
        #out_model = Dropout(0.3)(out_model)
        out_model=Reshape(target_shape=(62,1))(out_model)
        out_model=LSTM(112,return_sequences=False)(out_model)
        out_model=Reshape(target_shape=(112,1))(out_model)
        out_model=LSTM(96,return_sequences=False)(out_model)
        out_model = Dense(64)(out_model)
        out_model = Activation('relu')(out_model)
        out_model = Dense(16)(out_model)
        out_model = Activation('relu')(out_model)
        #out_model = Dropout(0.3)(out_model)
        out_model = Dense(1)(out_model)
        
        self.model = KerasModel(inputs=inp_model, outputs=out_model)
        
        self.model.compile(optimizer='Adam', metrics=['mean_absolute_error'],loss=[keras.losses.mean_squared_error])
        
    
    def fit(self, X_train, y_train, X_val, y_val):
        self.model.fit(self.preprocess(X_train), y_train,
                       validation_data=(self.preprocess(X_val), y_val),
                       epochs=12, batch_size=112,
                       #callbacks=[EarlyStopping(monitor='val_loss', patience=2)],
                   )
        self.model.save_weights('/content/drive/My Drive/temp/weightslstmd.h5')
        #print("Result on validation data: ", self.evaluate(X_val, y_val))
    def load(self):
      self.model.load_weights('/content/drive/My Drive/temp/weightslstmd1.h5')
    def prediction(self, X):
        return self.model.predict(self.preprocess(X)).flatten()

In [None]:
b=LSTMwithEmbeddings(X_train, y_train, X_val, y_val)

In [None]:
#b.fit(X_train, y_train, X_val, y_val)
b.load()

In [None]:
def fun(i,j):
    X_train,y_train,X_val,y_val=split(False,i,X,y,j)
    b.fit(X_train, y_train, X_val, y_val)
    b.load() 
    y_pred=b.prediction(X_test)
    return y_pred

In [None]:
pred=fun(0,0)

In [None]:
pred2=fun(0,1)

In [None]:
pred3=fun(1,0)

In [None]:
pred4=fun(1,1)

In [None]:
pred5=fun(2,0)

In [None]:
pred6=fun(2,1)

In [None]:
pred7=fun(3,0)

In [None]:
y_pred=fun(3,1)

In [None]:
b.load() # (0 1 2) # (1 1 2)  (2 1 2) (3 1 2) 

In [None]:
y_pred=b.prediction(X_test)

In [None]:
y_pred[:5]

In [None]:
y_test_f6[:5]

In [None]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from keras.metrics import mean_absolute_percentage_error

In [None]:
r2_score(y_test,y_pred)

In [None]:
mean_absolute_percentage_error(y_test,y_pred)

In [None]:
mean_absolute_error(y_test,y_pred)

In [None]:
import math
math.sqrt(mean_squared_error(y_test,y_pred))

In [None]:
SS_Residual = sum((y_test-y_pred)**2)  
print("SSE:",SS_Residual)     
SS_Total = sum((y_test-np.mean(y_test))**2)
print("TSS:",SS_Total)     
r_squared = 1 - (float(SS_Residual))/SS_Total
print("R-Square",r_squared)
adjusted_r_squared = 1 - (1-r_squared)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print("Adjusted R-square",adjusted_r_squared)

In [None]:
X_test=np.array(test_data.drop('sales',axis=1))
y_test=np.array(test_data['sales'])

In [None]:
X_test_f6=X_test[X_test[:,1]<6]

In [None]:
y_test_f6=y_test[X_test[:,1]<6]

In [None]:
y_test_last6

In [None]:
for i in range(1,11):
  X_test1=X_test[X_test[:,4]==i]
  y_test1=y_test[X_test[:,4]==i]
  y_pred1=b.prediction(X_test1)
  print(i,r2_score(y_test1,y_pred1))

In [None]:
np.unique(X_test[:,4])