In [None]:
from tensorflow import keras

from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

# from sklearn.ensemble import RandomForestRegressor

#Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#Optimizers
from keras import optimizers

#Results
from sklearn.metrics import mean_absolute_error 

from matplotlib import pyplot as plt
from datetime import datetime
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [None]:
# #Read the csvs into dataframes
# train_csv = pd.read_csv('./train.csv')
# test_csv = pd.read_csv('./test.csv')
# features_csv = pd.read_csv('./features.csv')
# stores_csv = pd.read_csv('./stores.csv')

### Preprocessing Training Data

In [None]:
# train_csv.head()

In [None]:
# #Merge the features and store information into the train_csv
# train_merged_df = (train_csv.merge(stores_csv, how='left', on='Store')).merge(features_csv, how='left', on = ['Date', 'Store'])
# del train_merged_df['IsHoliday_y'] #delete duplicate column
# train_merged_df = train_merged_df.rename(columns={"IsHoliday_x": "IsHoliday"})

# train_merged_df.head()

In [None]:
# #Fill empty markkdown sales with 0
# train_merged_df['MarkDown1'] = train_merged_df['MarkDown1'].fillna(0)
# train_merged_df['MarkDown2'] = train_merged_df['MarkDown2'].fillna(0)
# train_merged_df['MarkDown3'] = train_merged_df['MarkDown3'].fillna(0)
# train_merged_df['MarkDown4'] = train_merged_df['MarkDown4'].fillna(0)
# train_merged_df['MarkDown5'] = train_merged_df['MarkDown5'].fillna(0)

# train_merged_df.head()

In [None]:
# #Convert date column to datetime data type
# train_merged_df['Date'] = pd.to_datetime(train_merged_df['Date'])

# #Extract year, month and day from the datestamps
# train_merged_df['Year'] = train_merged_df['Date'].dt.year
# train_merged_df['Month'] = train_merged_df['Date'].dt.month
# train_merged_df['Day'] = train_merged_df['Date'].dt.day

# train_merged_df.drop(['Date'], axis=1, inplace=True)

In [None]:
# merged_df.to_csv(r'.\Merged.csv', index=False)

In [None]:
# Check for rows with na entries
train_merged_df.count()

In [None]:
#Add two columns - 4-week rolling average ('rolling_4_sales') and previous week sales('t-1_sales')
train_merged_df['rolling_4_sales'] = train_merged_df['Weekly_Sales'].rolling(4).mean()
train_merged_df['t-1_sales'] = train_merged_df['Weekly_Sales'].shift(periods=1, freq=None, axis=0)
train_merged_df.head()

In [None]:
print(train_merged_df.count())
train_merged_df = train_merged_df.dropna(how='any')
print(train_merged_df.count())

In [None]:
train_merged_df.dtypes

### Handling categorical columns - label encoding & one-hot encoding

In [None]:
#Use get_dummies to one-hot encode the 'Type'  and 'IsHoliday' column - one-hot encoding this column as it has >2 unique values
train_encoded_df = pd.get_dummies(train_merged_df, columns=["Type", 'IsHoliday'])

In [None]:
train_encoded_df.head()

In [None]:
C_mat = train_encoded_df.corr()
fig = plt.figure(figsize = (15,15))

sb.heatmap(C_mat, vmin =-1, vmax = 1, square=True, cmap='RdYlGn', annot=True, fmt='.1f')
plt.show()

### Split the features and target

In [None]:
y_train = train_encoded_df['Weekly_Sales'].values
y_train = y_train.reshape(-1,1)
X_train = train_encoded_df.drop('Weekly_Sales', axis=1)
feature_names = X_train.columns

### Scale and transform the data

In [None]:
X_scaler = MinMaxScaler().fit(X_train)
y_scaler = MinMaxScaler().fit(y_train)

X_train_scaled = X_scaler.transform(X_train)
y_train_scaled = y_scaler.transform(y_train)

### Neural Network

In [None]:
#Create a normal sequential neural network
nn_model = Sequential()
nn_model.add(Dense(units=32, kernel_initializer='normal', activation='relu', input_dim=len(X_train.columns))) # hidden and input layers
nn_model.add(Dense(units=1, kernel_initializer='normal',activation='linear')) # output layer
nn_model.summary()

In [None]:
opt = optimizers.Adam(lr=0.001, decay = 0.005)

nn_model.compile(optimizer= opt,
                loss='mean_absolute_error',
                metrics=['mean_absolute_error'])
    
nn_model_var = nn_model.fit(
                X_train_scaled,
                y_train_scaled,
                epochs=50,
                verbose=2)

plt.plot(nn_model_var.history['loss'])
plt.title('Normal Neural Network Model Loss (lr=0.001, decay = 0.005)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train Loss'], loc='upper right')
plt.show()

In [None]:
opt = optimizers.Adam(lr=0.001, decay = 0.05, amsgrad = True)

nn_model.compile(optimizer= 'adam',
                loss='mean_absolute_error',
                metrics=['mean_absolute_error'])
    
nn_model_var = nn_model.fit(
                X_train_scaled,
                y_train_scaled,
                epochs=50,
                verbose=2)

plt.plot(nn_model_var.history['loss'])
plt.title('Normal Neural Network Model Loss (lr=0.001, decay = 0.05, amsgrad = True)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train Loss'], loc='upper right')
plt.show()

In [None]:
nn_model_train_loss, nn_model_train_accuracy = nn_model.evaluate(
    X_train_scaled, y_train_scaled, verbose=2)
print(
    f"Normal Neural Network Evaluation - Train Loss: {nn_model_train_loss}, Train Accuracy: {nn_model_train_accuracy}")

In [None]:
# y_predicted_nn = nn_model.predict(X_test_scaled)

# MAE = mean_absolute_error(y_test_scaled , y_predicted_nn)
# MAE

### Deep Neural Network

In [None]:
dnn_model = Sequential()
dnn_model.add(Dense(units=32, kernel_initializer='normal', activation='relu', input_dim=len(X_train.columns))) # hidden and input layers
dnn_model.add(Dense(units=64, kernel_initializer='normal', activation='relu')) # hidden and input layers
dnn_model.add(Dense(units=128, kernel_initializer='normal', activation='relu')) # hidden and input layers
dnn_model.add(Dense(units=1, kernel_initializer='normal',activation='linear')) # output layer

dnn_model.summary()

In [None]:
#Define a checkpoint callback
checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]

In [None]:
opt = optimizers.Adam(lr=0.001, decay = 0.005)

dnn_model.compile(optimizer=opt,
              loss='mean_absolute_error',
              metrics=['mean_absolute_error'])

dnn_model_var = dnn_model.fit(
                        X_train_scaled,
                        y_train_scaled,
                        epochs=50,
                        shuffle=True,
                        verbose=2,
                        batch_size=32, 
                        validation_split = 0.33,
                        callbacks=callbacks_list
                    )

plt.plot(dnn_model_var.history['loss'])
plt.plot(dnn_model_var.history['val_loss'])
plt.title('Deep Neural Network Model Loss (lr=0.001, decay = 0.005)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

In [None]:
opt = optimizers.Adam(lr=0.001, decay = 0.05, amsgrad = True)

dnn_model_var = dnn_model.fit(
                        X_train_scaled,
                        y_train_scaled,
                        epochs=50,
                        shuffle=True,
                        verbose=2,
                        batch_size=32, 
                        validation_split = 0.33,
                        callbacks=callbacks_list
                    )

plt.plot(dnn_model_var.history['loss'])
plt.plot(dnn_model_var.history['val_loss'])
plt.title('Deep Neural Network Model Loss (lr=0.001, decay = 0.05, amsgrad = True)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

In [None]:
dnn_model.compile(optimizer='adam',
              loss='mean_absolute_error',
              metrics=['mean_absolute_error'])

dnn_model_var = dnn_model.fit(
                        X_train_scaled,
                        y_train_scaled,
                        epochs=50,
                        shuffle=True,
                        verbose=2,
                        batch_size=32, 
                        validation_split = 0.33,
                        callbacks=callbacks_list
                    )

plt.plot(dnn_model_var.history['loss'])
plt.plot(dnn_model_var.history['val_loss'])
plt.title('Deep Neural Network Model Loss - stock Adam')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

In [None]:
dnn_model_train_loss, dnn_model_train_accuracy = dnn_model.evaluate(
    X_train_scaled, y_train_scaled, verbose=2)
print(
    f"Deep Neural Network Evaluation - Train Loss: {dnn_model_train_loss}, Train Accuracy: {dnn_model_train_loss}")

In [None]:
# # Load wights file of the best model :
# wights_file = 'Weights-478--18738.19831.hdf5' # choose the best checkpoint 
# dnn_model.load_weights(wights_file) # load it
# dnn_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])

In [None]:
# y_predicted_dnn = dnn_model.predict(X_test_scaled)

In [None]:
# test and train - treat separately from start
#lr (adam), relu (alpha - make it 0.01) dropout, add more layers to dnn (to the power of 2), drop date and don't shuffle
# previous week/ rolling avg of sales (drop first row)
# t-1 and t column
# Try vanilla and stacked LSTMs (shampoo example) https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/
#Edit encoded_df to have one-hot columns for dept(99), store(45), year(4), month(12), day(7) and run model again to check accuracy
#Save model

# Predicting results using model for test data

### Preprocessing, One-hot encoding, Features-Target Split, Scaling

In [None]:
#Merge the features and store information into the test_csv
test_merged_df = (test_csv.merge(stores_csv, how='left', on='Store')).merge(features_csv, how='left', on = ['Date', 'Store'])
del test_merged_df['IsHoliday_y'] #delete duplicate column
test_merged_df = test_merged_df.rename(columns={"IsHoliday_x": "IsHoliday"})

#Fill empty markkdown sales with 0
test_merged_df['MarkDown1'] = test_merged_df['MarkDown1'].fillna(0)
test_merged_df['MarkDown2'] = test_merged_df['MarkDown2'].fillna(0)
test_merged_df['MarkDown3'] = test_merged_df['MarkDown3'].fillna(0)
test_merged_df['MarkDown4'] = test_merged_df['MarkDown4'].fillna(0)
test_merged_df['MarkDown5'] = test_merged_df['MarkDown5'].fillna(0)

#Convert date column to datetime data type
test_merged_df['Date'] = pd.to_datetime(test_merged_df['Date'])

#Extract year, month and day from the datestamps
test_merged_df['Year'] = test_merged_df['Date'].dt.year
test_merged_df['Month'] = test_merged_df['Date'].dt.month
test_merged_df['Day'] = test_merged_df['Date'].dt.day

test_merged_df.drop(['Date'], axis=1, inplace=True)

#Add two columns - 4-week rolling average ('rolling_4_sales') and previous week sales('t-1_sales')
test_merged_df['rolling_4_sales'] = test_merged_df['Weekly_Sales'].rolling(4).mean()
test_merged_df['t-1_sales'] = test_merged_df['Weekly_Sales'].shift(periods=1, freq=None, axis=0)

#Use get_dummies to one-hot encode the 'Type'  and 'IsHoliday' column - one-hot encoding this column as it has >2 unique values
test_encoded_df = pd.get_dummies(test_merged_df, columns=["Type", 'IsHoliday'])

# Features-Target Split
y_test = test_encoded_df['Weekly_Sales'].values
y_test = y_test.reshape(-1,1)
X_test = test_encoded_df.drop('Weekly_Sales', axis=1)
feature_names = X_test.columns

#Scaling
X_test_scaled = X_scaler.transform(X_test)
y_test_scaled = y_scaler.transform(y_test)

### Evaluations

In [None]:
nn_model1_test_loss, nn_model1_test_accuracy = nn_model1.evaluate(
    X_test_scaled, y_test_scaled, verbose=2)
print(
    f"Normal Neural Network Evaluation - Model 1 - Test Loss: {nn_model1_test_loss}, Test Accuracy: {nn_model1_test_accuracy}")

nn_model2_test_loss, nn_model2_test_accuracy = nn_model2.evaluate(
    X_test_scaled, y_test_scaled, verbose=2)
print(
    f"Normal Neural Network Evaluation - Model 2 - Test Loss: {nn_model2_test_loss}, Test Accuracy: {nn_model2_test_accuracy}")

dnn_model1_test_loss, dnn_model1_test_accuracy = dnn_model1.evaluate(
    X_test_scaled, y_test_scaled, verbose=2)
print(
    f"Normal Neural Network Evaluation - Model 1 - Test Loss: {dnn_model1_test_loss}, Test Accuracy: {dnn_model1_test_accuracy}")

dnn_model2_test_loss, dnn_model2_test_accuracy = dnn_model2.evaluate(
    X_test_scaled, y_test_scaled, verbose=2)
print(
    f"Normal Neural Network Evaluation - Model 2 - Test Loss: {dnn_model2_test_loss}, Test Accuracy: {dnn_model2_test_accuracy}")


In [None]:
y_predicted_nn1 = nn_model1.predict(X_test_scaled)

MAE1 = mean_absolute_error(y_test_scaled , y_predicted_nn1)
MAE1

In [None]:
y_predicted_nn2 = nn_model2.predict(X_test_scaled)

MAE2 = mean_absolute_error(y_test_scaled , y_predicted_nn2)
MAE2

In [None]:
y_predicted_dnn1 = dnn_model1.predict(X_test_scaled)

MAE3 = mean_absolute_error(y_test_scaled , y_predicted_dnn1)
MAE3

In [None]:
y_predicted_dnn2 = dnn_model1.predict(X_test_scaled)

MAE4 = mean_absolute_error(y_test_scaled , y_predicted_dnn2)
MAE4

In [None]:
#Ravel the arrays
y_test_minmaxscaled = y_test_scaled.ravel()
NN1_predictions = y_predicted_nn1.ravel()
NN2_predictions = y_predicted_nn2.ravel()
DNN1_predictions = y_predicted_dnn1.ravel()
DNN2_predictions = y_predicted_dnn2.ravel()

In [None]:
#Save y_test and predictions in a dataframe
Predictions_df = pd.DataFrame({"Y Test":y_test_minmaxscaled,
                               "NN 1 Predictions":NN1_predictions,
                               "NN 2 Predictions":NN2_predictions,
                               "DNN 1 Predictions":DNN1_predictions,
                               "DNN 2 Predictions":DNN2_predictions})

#Export dataframe
Predictions_df.to_csv('NN Predictions.csv')