# Imports

In [None]:
import os
from collections import Counter
from six.moves import xrange
from pprint import pprint
from math import sqrt
import pickle as pkl

import numpy as np
import pandas as pd
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
import random as rn

from sklearn.utils import check_array
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

import tensorflow as tf
from keras import losses
from keras import callbacks
from keras import regularizers
from keras import optimizers
from keras import backend as K
from keras.utils.np_utils import to_categorical
from keras.layers import Dense,Dropout,BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import BatchNormalization as BN
from keras.models import Sequential


np.random.seed(1)
rn.seed(1234)

# Data

In [None]:
inputdf = pd.read_csv("./New Data/inputdf.csv")
outputdf = pd.read_csv("./New Data/outputdf.csv")
print ("Data Read")

# Functions

In [None]:
def calMetric(yTrue,yPred,i):
    '''
    Mean Absolute Percentage Error.
    '''
    mape = 0
    for j in range(len(yTrue)):
        mape += (abs(yTrue[j]-yPred[j])/yTrue[j])
    mape = (mape*1.0/len(yTrue))*100    

    print "\n"
    print outputdf.columns[i]
    print "R2 : ",r2_score(yTrue,yPred)
    print "MAE : ",mean_absolute_error(yTrue,yPred)
    print "RMSE : ",sqrt(mean_squared_error(yTrue,yPred))
    print "MAPE : ",mape
    print ("----------------------------------------------------------")
    return  [r2_score(yTrue,yPred), mean_absolute_error(yTrue,yPred), sqrt(mean_squared_error(yTrue,yPred)),mape[0]]

In [None]:
'''
Function to get the Train and the Test indices.
'''
def train_test_indices(df):
    k = np.arange(df.shape[0])
    np.random.shuffle(k)
    t = (int)(0.8*len(k))
    train,test = k[0:t],k[t:]
    return train,test,k,t

In [None]:
'''
Function to perform Normalization.
'''
def normalize_df(totaldf,df,odf):
    
    #Don't normalize the last 6  features.
    for i in range(2,len(df.columns)-6):
        mx = max(totaldf.iloc[:,i].values)
        #mx = 2**(math.ceil(math.log(max(inputdf.iloc[:,i].values), 2)))
        mn = min(totaldf.iloc[:,i].values)
        
        df.iloc[:,i] = (df.iloc[:,i].values - mn)*1.0 / float(mx-mn)
        odf.iloc[:,i] = (odf.iloc[:,i].values - mn)*1.0 / float(mx-mn)
        
        #Now, if any Nans/ Infs are present, replace them with '0'.
        df = df.fillna(0)
        df = df.replace([np.inf],[0])
        
        odf = odf.fillna(0)
        odf = odf.replace([np.inf],[0])
        
    return df,odf

In [None]:
'''
Custom Loss Function
'''
def huber_loss(y_true, y_pred, clip_delta=1.0):
    
    error = y_true - y_pred
    cond  = K.abs(error) < clip_delta
    
    error_calc = K.abs((y_pred-y_true))*1.0/y_true
    
    val = tf.placeholder(tf.float32)
    #val = tf.add( tf.cast(tf.logical_and( tf.greater_equal(error_calc,tf.constant(15.0)) , tf.less_equal(error_calc,tf.constant(25.0))),tf.float32) , 1)
    
    #Check for the interval : 15-25%.
    val = tf.where( tf.logical_and( tf.greater(error_calc,tf.constant([[15.0]],dtype=tf.float32)) , tf.less_equal(error_calc,tf.constant([[25.0]],dtype=tf.float32))) , tf.constant([[2.0]],dtype=tf.float32), tf.constant([[1.0]],dtype=tf.float32))
    
    #Check for the interval : 25-35%.
    val = tf.where( tf.logical_and( tf.greater(error_calc,tf.constant([[25.0]],dtype=tf.float32)) , tf.less_equal(error_calc,tf.constant([[35.0]],dtype=tf.float32))) , tf.constant([[2.25]],dtype=tf.float32), tf.constant([[1.0]],dtype=tf.float32))
    
    squared_loss = 0.5 * K.square(error) * val
    linear_loss  = clip_delta * (K.abs(error) - 0.5 * clip_delta) * val
    
    return tf.where(cond, squared_loss, linear_loss)

def nn_model_init(xTrain):
        model = Sequential()
        model.add(Dense(300, input_dim = xTrain.shape[1], activation='relu',activity_regularizer=regularizers.l1(0.01)))
        model.add(Dropout(0.3))
        model.add(Dense(300, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(1))
        model.compile(loss=tf.losses.huber_loss,optimizer='Adam',metrics=['mae'])
        return model

In [None]:
def nn_model(xTrain,yTrain,xTest,yTest,all_days,model,size):
        
        '''
        Only train the model, when in a particular week, there is atleast one point.
        '''
        print(xTrain.shape, xTest.shape, yTrain.shape, yTest.shape)
        
        if (size>0):
            print "Training the model."
            callbacks1 = [callbacks.EarlyStopping(monitor='mean_absolute_error',patience=20, verbose=0)]
            model.fit(xTrain,yTrain,epochs=500,batch_size=7,verbose=2,callbacks=callbacks1)
        
        yPred = model.predict(xTest,batch_size=7)
        
        return yPred

# NN Model

## Normalised 0 to 1 absolute plots

In [None]:
np.random.seed(1)
tf.set_random_seed(1)

'''
INCORPORATION OF MODEL1-MODEL2.

NN Model
Processing data, for the model. Normalized!!!
Features :
(1) 4 columns pertaining to the language wise occupancies.l
(2) Moving Average of food per occ of food items. [2,3,7,10 days of MA]
Total Features : 8

With Dropout.
'''

master_ypred = {}
master_ytest = {}
master_mape = {}
history2 = {}
indices = {}
master_dict = {}
start_date = []
end_date = []

outputdf = outputdf[outputdf.columns[0:14]]
'''
Get the indices for the weekdays and weekends in 2017.
'''

for i in range(0, 14):
    totaldf = pd.DataFrame()
    df = pd.DataFrame()
    odf = pd.DataFrame()
    tdf = pd.DataFrame()
    todf = pd.DataFrame()
    dates = []
    mape = []
    df,odf,tdf,todf,o_indices = init_dfs(df,odf,tdf,todf)
    
    start = tdf.iloc[0][0]
    end = tdf.iloc[tdf.shape[0]-1][0]
    
    print df.shape,odf.shape,tdf.shape,todf.shape
    o_indices = list(tdf.index)
    totaldf = df.copy()
    
    loopcounter = 0
    model_init_counter = 0
    
    master_dict[i] = {}
    master_ypred[i] = []
    master_ytest[i] = []
    master_mape[i] = []
    
    history2 = {}
    
    while(1):
        errcategory = []
        print "\n\n"
        print "--------------------------------------------------------------"
        indices[loopcounter] = []
        history2[tdf['Date'][0]] = []
        local = []
        counter = 0
        '''
        Initially, df and odf are defined.

        Normalization should occur, each and every time, a new batch comes in.

        Make some changes here, where df refers to the current training dataframe.
        For each food, initially 'df' will be containing the first month of training.
        '''
        print "Shape of Train df, Test df : ",df.shape,tdf.shape
        print "Train between : ",df.iloc[0][0]," - ",df.iloc[-1][0]
        start_date += [df.iloc[0][0]]
        end_date += [df.iloc[-1][0]]
        print "Test between  : ",tdf.iloc[0][0]," - ",tdf.iloc[-1][0]
        tdf = tdf.fillna(0)
        tdf = tdf.replace([np.inf],[0])
        
        df = df.fillna(0)
        df = df.replace([np.inf],[0])
        # Normalize both the input and the output at the same time.
        df,tdf = normalize_df(totaldf,df, tdf)
        
        if(model_init_counter==0):
            df2 = df
            odf2 = odf
        else:
            #Split df into df and df2 based on holiday / not holiday.
            req_indices = list(df[df['Holiday/Not Holiday of tomorrow']==0].index)
            df2 = df[df['Holiday/Not Holiday of tomorrow']==0]
            odf2 = odf.iloc[req_indices,:]
            df2.reset_index(drop=True,inplace=True)
            odf2.reset_index(drop=True,inplace=True)
        
        #Split tdf, into tdf and tdf2 based on holiday/not holiday.
        req_indices = list(tdf[tdf['Holiday/Not Holiday of tomorrow']==0].index)
        tdf2 = tdf[tdf['Holiday/Not Holiday of tomorrow']==0]
        todf2 = todf.iloc[req_indices,:]
        tdf2.reset_index(drop=True,inplace=True)
        todf2.reset_index(drop=True,inplace=True)
        
        req_indices = list(tdf[tdf['Holiday/Not Holiday of tomorrow']==1].index)
        tdf = tdf[tdf['Holiday/Not Holiday of tomorrow']==1]
        todf = todf.iloc[req_indices,:]
        tdf.reset_index(drop=True,inplace=True)
        todf.reset_index(drop=True,inplace=True)
        
        #First get the holiday dates, then the non-holiday dates, in a particular week.
        dates += list(tdf['Date'])
        dates += list(tdf2['Date'])
        
        test_indices = range(tdf.shape[0])
        xTrain = ((df.values[:,[485,486,487] + range(2,6)]))
        xTrain2 = ((df2.values[:,[485,486,487] + range(2,6)]))
        xTest = ((tdf.values[:, [485,486,487] + range(2,6)]))
        xTest2 = ((tdf2.values[:, [485,486,487] + range(2,6)]))
        
        lst = [6, 20, 34, 48, 426]

        for j in lst:
            xTrain = np.hstack((xTrain, df.values[:,i+j].reshape(-1, 1)))
            xTrain2 = np.hstack((xTrain2, df2.values[:,i+j].reshape(-1, 1)))
            xTest = np.hstack((xTest, tdf.values[:,i+j].reshape(-1, 1),))
            xTest2 = np.hstack((xTest2, tdf2.values[:,i+j].reshape(-1, 1),))
        
        yTrain = odf.values[:,i]
        yTrain2 = odf2.values[:,i]
        yTest = todf.values[:,i]
        yTest2 = todf2.values[:,i]

        print("\n")
        if (model_init_counter==0):
            model = nn_model_init(xTrain)
            model2 = nn_model_init(xTrain2)
        '''
        xTest , yTest will be for 'model'. (only predicting for holidays.)
        xTest2 , yTest2 will be for 'model2'. (only predicting for non-holidays.)
        '''
        print "------------------------------------------------------"
        print "Shapes : "
        print "xTrain , yTrain , xTest , yTest : ",xTrain.shape,yTrain.shape,xTest.shape,yTest.shape
        print "xTrain2 , yTrain2 , xTest2 , yTest2 : ",xTrain2.shape,yTrain2.shape,xTest2.shape,yTest2.shape
        print "------------------------------------------------------"
        # 'model' trains on everything present.
        print"-------------------------------------------------------"
        print "Training Model 1"
        yPred = nn_model(xTrain, yTrain, xTest, yTest, test_indices, model,df.shape[0])
        print "-------------------------------------------------------"
        print "Training Model 2"
        # 'model2' selectively trains on weekdays, and is used to predict for weekdays.
        yPred2 = nn_model(xTrain2, yTrain2, xTest2, yTest2, test_indices, model2,df2.shape[0])
        yPred = np.concatenate((yPred,yPred2))
        
        for looper in range(yPred.shape[0]):
            master_ypred[i] += list(yPred[looper])
        
        #Combine both yTest and yTest2 into yTest, in the format, HOLIDAY followed by NOT HOLIDAY.
        yTest = np.concatenate((yTest,yTest2))
        master_ytest[i] += list(yTest)
        
        templist = mape_calc(yTest,yPred)
        master_mape[i] += templist
        
        local.append(calMetric(yTest, yPred, i))
        
        errcategory = calculate_err_category(templist)
        # Sequential test, train split.
        
        #Recombine tdf and tdf2 into tdf, for computation of the next batch.
        tdf = pd.concat([tdf,tdf2],axis=0)
        tdf.reset_index(drop=True,inplace=True)
        print "Null(s) Check : ",np.sum(tdf.isnull().sum(axis=1))
        
        # New train set is going to be one week.
        indexes = list(tdf['Date'])
        temporarydf = inputdf[inputdf['Date'].isin(indexes)].reset_index(drop=True)
        totaldf = pd.concat([totaldf,temporarydf],axis=0)
        totaldf.reset_index(drop=True,inplace=True)
    
        df = totaldf.iloc[-15:]
        
        df.reset_index(drop=True,inplace=True)
        odf = outputdf.iloc[inputdf[inputdf['Date'].isin(list(df['Date']))].index].reset_index(drop=True)
        
        if((end+pd.Timedelta(7,unit='D')) > pd.to_datetime("2017-11-19")):
            print "When exiting the loop  ",end
            break

        indexes = list(inputdf[ (inputdf['Date'] > end) & (inputdf['Date'] <= (
            end + pd.Timedelta(value=7, unit='D')))].index)
        
        o_indices += indexes
        # Generating the test dataframe.
        tdf = inputdf[(inputdf['Date'] > end) & (inputdf['Date'] <= (
            end + pd.Timedelta(value=7, unit='D')))].reset_index(drop=True)

        todf = outputdf.iloc[indexes]
        
        start = tdf.iloc[0][0]
        end = tdf.iloc[tdf.shape[0]-1][0] 
        
        if(tdf.shape[0] < 7):
            print "Detected a problem !!"
            print ("Shape of Test Set : ",tdf.shape)
            #break
            
        loopcounter += 1
        model_init_counter+=1
        
    del model
    del model2

In [None]:
np.random.seed(1)
tf.set_random_seed(1)

'''
NN Model
Processing data, for the model. Normalized!!!
Features :
(1) 4 columns pertaining to the language wise occupancies.l
(2) Moving Average of food per occ of food items. [2,3,7,10 days of MA]
Total Features : 8

With Dropout.
'''

master_ypred = {}
master_ytest = {}
master_mape = {}
history2 = {}
indices = {}
master_dict = {}
start_date = []
end_date = []

outputdf = outputdf[outputdf.columns[0:14]]
'''
Get the indices for the weekdays and weekends in 2017.
'''

for i in range(0, 14):
    totaldf = pd.DataFrame()
    df = pd.DataFrame()
    odf = pd.DataFrame()
    tdf = pd.DataFrame()
    todf = pd.DataFrame()
    dates = []
    mape = []
    df,odf,tdf,todf,o_indices = init_dfs(df,odf,tdf,todf)
    
    start = tdf.iloc[0][0]
    end = tdf.iloc[tdf.shape[0]-1][0]
    
    print df.shape,odf.shape,tdf.shape,todf.shape
    o_indices = list(tdf.index)
    totaldf = df.copy()
    
    loopcounter = 0
    model_init_counter = 0
    
    master_dict[i] = {}
    master_ypred[i] = []
    master_ytest[i] = []
    master_mape[i] = []
    
    history2 = {}
    
    while(1):
        errcategory = []
        print "\n\n"
        print "--------------------------------------------------------------"
        indices[loopcounter] = []
        history2[tdf['Date'][0]] = []
        local = []
        counter = 0
        '''
        Initially, df and odf are defined.

        Normalization should occur, each and every time, a new batch comes in.

        Make some changes here, where df refers to the current training dataframe.
        For each food, initially 'df' will be containing the first month of training.
        '''
        print "Shape of Train df, Test df : ",df.shape,tdf.shape
        print "Train between : ",df.iloc[0][0]," - ",df.iloc[-1][0]
        start_date += [df.iloc[0][0]]
        end_date += [df.iloc[-1][0]]
        print "Test between  : ",tdf.iloc[0][0]," - ",tdf.iloc[-1][0]
        tdf = tdf.fillna(0)
        tdf = tdf.replace([np.inf],[0])
        
        df = df.fillna(0)
        df = df.replace([np.inf],[0])
        # Normalize both the input and the output at the same time.
        df,tdf = normalize_df(totaldf,df, tdf)
        #print df.head
        test_indices = range(tdf.shape[0])
        xTrain = ((df.values[:,[485,486,487] + range(2,6)]))
        xTest = ((tdf.values[:, [485,486,487] + range(2,6)]))
        lst = [6, 20, 34, 48, 426 ]

        for j in lst:
            xTrain = np.hstack((xTrain, df.values[:,i+j].reshape(-1, 1)))
            xTest = np.hstack((xTest, tdf.values[:,i+j].reshape(-1, 1),))
        
        yTrain = odf.values[:,i]
        yTest = todf.values[:,i]

        print("\n")
        if (model_init_counter==0):
            model = nn_model_init(xTrain)
        yPred = nn_model(xTrain, yTrain, xTest, yTest, test_indices, model,df.shape[0])
        
        for looper in range(yPred.shape[0]):
            master_ypred[i] += list(yPred[looper])
        
        master_ytest[i] += list(yTest)
        templist = mape_calc(yTest,yPred)
        master_mape[i] += templist
        dates += list(tdf['Date'])
        
        local.append(calMetric(yTest, yPred, i))
        history2[tdf['Date'][0]].append(local)
        
        errcategory = calculate_err_category(templist)
        # Sequential test, train split.
        
        # New train set is going to be one week.
        indexes = list(tdf['Date'])
        temporarydf = inputdf[inputdf['Date'].isin(indexes)].reset_index(drop=True)
        totaldf = pd.concat([totaldf,temporarydf],axis=0)
        totaldf.reset_index(drop=True,inplace=True)
    
        df = totaldf.iloc[-15:]
        
        df.reset_index(drop=True,inplace=True)
        odf = outputdf.iloc[inputdf[inputdf['Date'].isin(list(df['Date']))].index].reset_index(drop=True)
        
        if((end+pd.Timedelta(7,unit='D')) > pd.to_datetime("2017-11-19")):
            print "When exiting the loop  ",end
            break

        indexes = list(inputdf[ (inputdf['Date'] > end) & (inputdf['Date'] <= (
            end + pd.Timedelta(value=7, unit='D')))].index)
        
        o_indices += indexes
        # Generating the test dataframe.
        tdf = inputdf[(inputdf['Date'] > end) & (inputdf['Date'] <= (
            end + pd.Timedelta(value=7, unit='D')))].reset_index(drop=True)

        todf = outputdf.iloc[indexes]
        
        start = tdf.iloc[0][0]
        end = tdf.iloc[tdf.shape[0]-1][0] 
        
        if(tdf.shape[0] < 7):
            print "Detected a problem !!"
            print ("Shape of Test Set : ",tdf.shape)
            #break
            
        loopcounter += 1
        model_init_counter+=1
        
    del model
    master_dict[i] = (history2)

In [None]:
for i in range(len(start_date)):
    print i," : ",start_date[i]," - ",end_date[i],"\n"

In [None]:
outputdf.columns = ['food1', 'food2', 'food3', 'food4', 'food5', 'food6', 'food7',
       'food8', 'food9', 'food10', 'food11', 'food12', 'food13', 'food14']

mean = []
median = []
writer = pd.ExcelWriter("DynamicMeanMedianModel7.xlsx")
for food_counter in range(14):
    mean = []
    median = []
    print outputdf.columns[food_counter]
    for i in range(len(start_date)):
        #print i," : ",start_date[i]," - ",end_date[i],"\n"
        mean += [correctdf[(correctdf['Date']>=start_date[i])&(correctdf['Date']<=end_date[i])].iloc[:,80+food_counter].mean()]
        median += [correctdf[(correctdf['Date']>=start_date[i])&(correctdf['Date']<=end_date[i])].iloc[:,80+food_counter].median()]
    print len(mean),len(median),len(start_date),len(end_date)
    d = pd.DataFrame(columns=["Training Start Date","Training End Date","Mean","Median"])
    d['Training Start Date'] = start_date
    d['Training End Date'] = end_date
    d['Mean'] = mean
    d['Median'] = median
    d.to_excel(writer,sheet_name=outputdf.columns[food_counter],index=False)
writer.save()

In [None]:
j=2
for i in range(len(master_ypred[0])):
    try:
        if(master_ypred[0][j] == master_ypred[0][i]):
            print i,j
            break
        j+=1
    except:
        j+=1

In [None]:
for i in range(b.shape[0]):
        if(b.iloc[i][0]!=te.iloc[i][0]):
            print b.iloc[i][0]," : ",te.iloc[i][0]
            print i
            break

In [None]:
for key,val in master_dict.iteritems():
    print outputdf.columns[key]," : ","\n"
    for key1,val1 in sorted(val.iteritems()):
        print key1," : ","\n",val1,"\n"
    print "--------------------------------"
    print "\n\n"    

In [None]:
def calc_mape(yPred,yTest):
    mape = []
    for i in range(yPred.shape[0]):
        mape.append(abs(yPred[i] - yTest[i])*1.0/yTest[i]*100)
    return mape

In [None]:
'''
Metrics on Randomized split.
80-20 split.

Latest Metrics NOW

9PM Results
with Spikes

12 features.

language occupancies of day i.
food sales history till day i-2 (inclusive). + food sales of day 'i-1' till 6PM.
moving average till i-2 (inclusive). + inclusive of sales of food till day 'i-1' for MA.

4 language occupancies + 4 MA of food per occ of 2,3,7,10 days + 7 days of history !!

'''
for key,val in history2.items():
    print ("------------------------------------------------")
    print ("------------------------------------------------")
    print ("Iteration Number : ",key)
    print ("\n")
    for j in range(len(val[0])):
        print (outputdf.columns[j])
        print ("R2    : ",val[0][j][0])
        print ("MAE   : ",val[0][j][1])
        print ("RMSE  : ",val[0][j][2])
        print ("MAPE : ",val[0][j][3][0])
        print ("\n")

In [None]:
'''
Metrics on Randomized split.
80-20 split.

Latest Metrics NOW

9PM Results
without spikes

12 features.

language occupancies of day i.
food sales history till day i-2 (inclusive). + food sales of day 'i-1' till 6PM.
moving average till i-2 (inclusive). + inclusive of sales of food till day 'i-1' for MA.

4 language occupancies + 4 MA of food per occ of 2,3,7,10 days + 7 days of history !!

'''
for key,val in history2.items():
    print ("------------------------------------------------")
    print ("------------------------------------------------")
    print ("Iteration Number : ",key)
    print ("\n")
    for j in range(len(val[0])):
        print (outputdf.columns[j])
        print ("R2    : ",val[0][j][0])
        print ("MAE   : ",val[0][j][1])
        print ("RMSE  : ",val[0][j][2])
        print ("MAPE : ",val[0][j][3][0])
        print ("\n")

# XGBoost Model

In [None]:
from numpy.random import seed
from xgboost import plot_importance
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
seed(1)

'''
Processing data, for the model. Normalized!!!
Features :
(1) 4 columns pertaining to the language wise occupancies.
(2) Moving Average of food per occ of food items. [2,3,7,10 days of MA]
Total Features : 8

With Dropout.
'''

master_ypred = []
master_ytest = []
history2 = {}
indices = {}

tsize=int(0.8*inputdf.shape[0])
outputdf = outputdf[outputdf.columns[0:14]]


'''
Get the indices for the weekdays and weekends in 2017.
'''

for loopcounter in range(1):
    
    indices[loopcounter] = []
    history2[loopcounter] = []
    local = []
    counter = 0
    
    k = np.arange(inputdf.shape[0])
    np.random.shuffle(k)
    t = (int)(0.8*len(k))
    train,test = k[0:t],k[t:]
    seq_indices = {}
    c_counter=0
    test_indices = []
    
    #Convert the indices for the test set, in sequential order.
    for c in test:
        seq_indices[c] = c_counter
        c_counter+=1
    
    df2017 = inputdf.iloc[test]
    
    df2017.dow = df2017.Date.dt.dayofweek
    
    #Weekends : (Fri,Sat,Sun)
    weekends = (df2017[(df2017.dow==4) | (df2017.dow==5) |(df2017.dow==6)].index)
    
    #Weekdays : (Mon - Thurs)
    weekdays = (df2017[(df2017.dow==0) | (df2017.dow==1) |(df2017.dow==2) | (df2017.dow==3)].index)
    
    weekends_indices = []
    weekdays_indices = []
    
    for c in weekdays:
        weekdays_indices.append(seq_indices[c])
    
    for c in weekends:
        weekends_indices.append(seq_indices[c])
        
    all_days = weekends_indices + weekdays_indices
    
    #Get the indices of the test set. Using these indices, we'll get the corresponding dates.
    indices[loopcounter].append(k[t:])
    len(train),inputdf.shape,test
    lst = [6, 20, 34, 48, 426]
    for i in range(0,14):
        xTrain, xTest = ((inputdf.values[:tsize,2:6])) , ((inputdf.values[tsize:,2:6]))  
        print (xTrain.shape,xTest.shape)
        for j in lst:
            xTrain, xTest = np.hstack((xTrain,inputdf.values[:tsize,i+j].reshape(-1,1))),np.hstack((xTest,inputdf.values[tsize:,i+j].reshape(-1,1)))    

        xTrain, xTest = np.hstack((xTrain,to_categorical((inputdf['dow']-1))[:tsize])),np.hstack((xTest,to_categorical((inputdf['dow']-1))[tsize:]))
        yTrain, yTest = outputdf.values[:tsize,i], outputdf.values[tsize:,i]

        print(xTrain.shape,xTest.shape,yTrain.shape,yTest.shape)
        x = np.vstack((xTrain,xTest))
        y = np.vstack((yTrain.reshape((-1,1)),yTest.reshape((-1,1))))

        xTrain = pd.DataFrame(x).iloc[train].values
        xTest  = pd.DataFrame(x).iloc[test].values
        yTrain = pd.DataFrame(y).iloc[train].values
        yTest  = pd.DataFrame(y).iloc[test].values
        
        print(xTrain.shape, yTrain.shape)
        
        estimator = XGBRegressor( learning_rate=0.1, n_estimators=500, max_depth=5,
        min_child_weight=2, gamma=0, subsample=0.8, colsample_bytree=0.8, 
        nthread=4, scale_pos_weight=1,seed=27)
        estimator.fit(xTrain,yTrain)
        yPred = estimator.predict(xTest)
        yTest = np.array([item for sublist in yTest for item in sublist])
        
        print ("Plot")
        plt.figure(figsize=(15,6))
        plt.hist(abs((yPred - yTest)*100/yTest),bins=100)
        plt.grid(True)
        plt.show()
        plot_importance(estimator)
        
        master_ypred.append(yPred)
        master_ytest.append(yTest)
        local.append(calMetric(yTest,yPred,i))
        
        del estimator
        
    history2[loopcounter].append(local)     

## XGBOOST Results

In [None]:
'''
Metrics on Randomized split.
80-20 split.

Latest Metrics NOW

9PM Results
with Spikes

12 features.

language occupancies of day i.
food sales history till day i-2 (inclusive). + food sales of day 'i-1' till 6PM.
moving average till i-2 (inclusive). + inclusive of sales of food till day 'i-1' for MA.

4 language occupancies + 4 MA of food per occ of 2,3,7,10 days + 7 days of history !!

'''
for key,val in history2.items():
    print ("------------------------------------------------")
    print ("------------------------------------------------")
    print ("Iteration Number : ",key)
    print ("\n")
    for j in range(len(val[0])):
        print (outputdf.columns[j])
        print ("R2    : ",val[0][j][0])
        print ("MAE   : ",val[0][j][1])
        print ("RMSE  : ",val[0][j][2])
        print ("MAPE : ",val[0][j][3])
        print ("\n")

In [None]:
'''
Metrics on Randomized split.
80-20 split.

Latest Metrics NOW

9PM Results
without Spikes

12 features.

language occupancies of day i.
food sales history till day i-2 (inclusive). + food sales of day 'i-1' till 6PM.
moving average till i-2 (inclusive). + inclusive of sales of food till day 'i-1' for MA.

4 language occupancies + 4 MA of food per occ of 2,3,7,10 days + 7 days of history !!

'''
for key,val in history2.items():
    print ("------------------------------------------------")
    print ("------------------------------------------------")
    print ("Iteration Number : ",key)
    print ("\n")
    for j in range(len(val[0])):
        print (outputdf.columns[j])
        print ("R2    : ",val[0][j][0])
        print ("MAE   : ",val[0][j][1])
        print ("RMSE  : ",val[0][j][2])
        print ("MAPE : ",val[0][j][3])
        print ("\n")

# Results saving

In [None]:
outputdf.columns = ['food1', 'food2', 'food3', 'food4', 'food5', 'food6', 'food7',
       'food8', 'food9', 'food10', 'food11', 'food12', 'food13', 'food14']
'''
The dates that you see in the excel file, will be one day less than the actual date.
i.e. it will be '07-03-2016' instead of '08-03-2016', because we make a prediction
today (i-1), for tomorrow.
'''
#d = pd.DataFrame(inputdf.iloc[test+1]["Date"])
#d.reset_index(drop=True,inplace=True)

In [None]:
len (dates ) , len(master_ypred[0]) , len(master_ytest[0]) , len(master_mape[0])

In [None]:
dates = inputdf[(inputdf['Date']>=pd.to_datetime("2017-02-01"))&(inputdf['Date']<=pd.to_datetime("2017-11-19"))]['Date']
dates.reset_index(drop=True,inplace=True)
dates

writer = pd.ExcelWriter('NewModel3NN.xlsx')
'''
l = pd.DataFrame(sorted(master_dict[0].keys()),columns=["Start Date"])
l["End Date"] = l["Start Date"]+pd.Timedelta(6,unit='D')
l.to_excel(writer,'Date_Sheet',index=False)
'''

for j in range(14):
    d = pd.DataFrame()
    d['Date'] = dates
    d['yPred'] = master_ypred[j]
    d['yTest'] = master_ytest[j]
    d['mape'] = master_mape[j]
    d.to_excel(writer,sheet_name=outputdf.columns[j],index=False)
    
writer.save()