In [None]:
# import libraries
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns 

# for making the output constant across all run
np.random.seed(42)

# display settings & code formatting
pd.options.display.max_columns = 999
%matplotlib inline

# project paths
# project_root_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# data_path = os.path.join(project_root_dir, "data")
# os.makedirs(data_path, exist_ok=True)

data_path = "/content/drive/MyDrive/workspace/walmart/data"

# function for loading data
def read_data(filename, date_col=None, data_path=data_path):
    csv_path = os.path.join(data_path, filename)
    return pd.read_csv(csv_path, parse_dates=date_col)

# function for saving data as csv file
def save_dataframe(df, filename, file_path=data_path):
    path = os.path.join(file_path, filename)
    df.to_csv(path, index=False)


In [None]:
train = read_data("train.csv", date_col=["Date"])
test = read_data("test.csv", date_col=["Date"])
stores = read_data("stores.csv")
features = read_data("features.csv", date_col=["Date"])
sample_submission = read_data("sampleSubmission.csv")

## Prepare Data 

### Feature Engineering

In [None]:
# Merge the stores data with train and test
train = pd.merge(train, stores, how="left", on="Store")
test = pd.merge(test, stores, how="left", on="Store")

# Merge the features data with train and test
train = pd.merge(train, features, how="left", on=["Store", "Date"])
test = pd.merge(test, features, how="left", on=["Store", "Date"])

train.drop(["IsHoliday_y"], axis=1, inplace=True)
test.drop(["IsHoliday_y"], axis=1, inplace=True)

# rename column
train.rename(columns={"IsHoliday_x": "IsHoliday"}, inplace=True)
test.rename(columns={"IsHoliday_x": "IsHoliday"}, inplace=True)

## Datetime features
train["Year"] = train["Date"].dt.year
train["Month"] = train["Date"].dt.month
train["Day"] = train["Date"].dt.day
train["WeekOfYear"] = train["Date"].dt.weekofyear
train["DayOfWeek"] = train["Date"].dt.dayofweek
train["Weekend"] = (train["Date"].dt.weekday >= 5).astype(int)

test["Year"] = test["Date"].dt.year
test["Month"] = test["Date"].dt.month
test["Day"] = test["Date"].dt.day
test["WeekOfYear"] = test["Date"].dt.weekofyear
test["DayOfWeek"] = test["Date"].dt.dayofweek
test["Weekend"] = (test["Date"].dt.weekday >= 5).astype(int)

# convert boolean column to categorical column
train["IsHoliday"] = train["IsHoliday"].map({True: "Yes", False: "No"})
test["IsHoliday"] = test["IsHoliday"].map({True: "Yes", False: "No"})
train["IsHoliday"] = train["IsHoliday"].astype("category")
test["IsHoliday"] = test["IsHoliday"].astype("category")

# ordered the categorical store type col
from pandas.api.types import CategoricalDtype

cat_type = CategoricalDtype(categories=["C", "B", "A"], ordered=True)
train["Type"] = train["Type"].astype(cat_type)
test["Type"] = test["Type"].astype(cat_type)

# convert to categorical columns
train["Store"] = train["Store"].astype("category")
train["Dept"] = train["Dept"].astype("category")
train["Year"] = train["Year"].astype("category")
train["Month"] = train["Month"].astype("category")
train["DayOfWeek"] = train["DayOfWeek"].astype("category")
train["Weekend"] = train["Weekend"].astype("category")

# convert to categorical columns
test["Store"] = test["Store"].astype("category")
test["Dept"] = test["Dept"].astype("category")
test["Year"] = test["Year"].astype("category")
test["Month"] = test["Month"].astype("category")
test["DayOfWeek"] = test["DayOfWeek"].astype("category")
test["Weekend"] = test["Weekend"].astype("category")


Series.dt.weekofyear and Series.dt.week have been deprecated.  Please use Series.dt.isocalendar().week instead.


Series.dt.weekofyear and Series.dt.week have been deprecated.  Please use Series.dt.isocalendar().week instead.



### Data Preprocessing Pipeline

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder


# features and labels of train and test set
# labels of test are not provided as we need to predict them

X_train = train.drop(["Weekly_Sales"], axis=1).copy()
y_train = train["Weekly_Sales"].copy()

X_test = test.copy()

# drop and save the date column in a variable
train_date = X_train.pop("Date")
test_date = X_test.pop("Date")


#### Data preparation pipeline

# select numerical and categorical columns
num_cols = X_train.select_dtypes(exclude=["object", "category"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object", "category"]).columns.tolist()

# numerical date preprocessing pipeline
num_pipe = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())

# categorical data preprocessing pipeline
cat_pipe = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="NA"),
    OneHotEncoder(handle_unknown="ignore", sparse=False),
)

# full pipeline
full_pipe = ColumnTransformer(
    [("num", num_pipe, num_cols), ("cat", cat_pipe, cat_cols)]
)

full_pipe

ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
                  transformer_weights=None,
                  transformers=[('num',
                                 Pipeline(memory=None,
                                          steps=[('simpleimputer',
                                                  SimpleImputer(add_indicator=False,
                                                                copy=True,
                                                                fill_value=None,
                                                                missing_values=nan,
                                                                strategy='median',
                                                                verbose=0)),
                                                 ('standardscaler',
                                                  StandardScaler(copy=True,
                                                                 with_mean=True,
             

In [None]:
X_train_tr = full_pipe.fit_transform(X_train)
X_test_tr = full_pipe.transform(X_test)

In [None]:
# Get the list of categories generated by the one-hot-encoder
ohe_categories = full_pipe.named_transformers_.cat.named_steps.onehotencoder.categories_

# Create nice names for our one hot encoded features
new_ohe_features = [
    f"{col}__{val}" for col, vals in zip(cat_cols, ohe_categories) for val in vals
]

# Create a new list with all names of features
all_features = num_cols + new_ohe_features

# Create pandas dataframe
X_train_tr = pd.DataFrame(X_train_tr, columns=all_features)
X_test_tr = pd.DataFrame(X_test_tr, columns=all_features)

In [None]:
X_train_tr.head()

Unnamed: 0,Size,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Day,WeekOfYear,Store__1,Store__2,Store__3,Store__4,Store__5,Store__6,Store__7,Store__8,Store__9,Store__10,Store__11,Store__12,Store__13,Store__14,Store__15,Store__16,Store__17,Store__18,Store__19,Store__20,Store__21,Store__22,Store__23,Store__24,Store__25,Store__26,Store__27,Store__28,Store__29,Store__30,Store__31,Store__32,Store__33,Store__34,Store__35,Store__36,Store__37,Store__38,Store__39,Store__40,Store__41,Store__42,Store__43,Store__44,Store__45,Dept__1,Dept__2,Dept__3,Dept__4,Dept__5,Dept__6,Dept__7,Dept__8,Dept__9,Dept__10,Dept__11,Dept__12,Dept__13,Dept__14,Dept__16,Dept__17,Dept__18,Dept__19,Dept__20,Dept__21,Dept__22,Dept__23,Dept__24,Dept__25,Dept__26,Dept__27,Dept__28,Dept__29,Dept__30,Dept__31,Dept__32,Dept__33,Dept__34,Dept__35,Dept__36,Dept__37,Dept__38,Dept__39,Dept__40,Dept__41,Dept__42,Dept__43,Dept__44,Dept__45,Dept__46,Dept__47,Dept__48,Dept__49,Dept__50,Dept__51,Dept__52,Dept__54,Dept__55,Dept__56,Dept__58,Dept__59,Dept__60,Dept__65,Dept__67,Dept__71,Dept__72,Dept__74,Dept__77,Dept__78,Dept__79,Dept__80,Dept__81,Dept__82,Dept__83,Dept__85,Dept__87,Dept__90,Dept__91,Dept__92,Dept__93,Dept__94,Dept__95,Dept__96,Dept__97,Dept__98,Dept__99,IsHoliday__No,IsHoliday__Yes,Type__A,Type__B,Type__C,Year__2010,Year__2011,Year__2012,Month__1,Month__2,Month__3,Month__4,Month__5,Month__6,Month__7,Month__8,Month__9,Month__10,Month__11,Month__12,DayOfWeek__4,Weekend__0
0,0.239209,-0.963798,-1.720834,-0.134678,-0.163871,-0.083237,-0.165943,-0.125789,1.018774,0.078201,-1.219293,-1.471661,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
1,0.239209,-1.169783,-1.773177,-0.134678,-0.163871,-0.083237,-0.165943,-0.125789,1.022498,0.078201,-0.419617,-1.400999,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,0.239209,-1.09281,-1.84733,-0.134678,-0.163871,-0.083237,-0.165943,-0.125789,1.023697,0.078201,0.38006,-1.330337,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
3,0.239209,-0.729625,-1.744825,-0.134678,-0.163871,-0.083237,-0.165943,-0.125789,1.024476,0.078201,1.179736,-1.259675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,0.239209,-0.736672,-1.605243,-0.134678,-0.163871,-0.083237,-0.165943,-0.125789,1.025255,0.078201,-1.219293,-1.189013,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


In [None]:
y_train[:5]

0    24924.50
1    46039.49
2    41595.55
3    19403.54
4    21827.90
Name: Weekly_Sales, dtype: float64

In [None]:
from sklearn.metrics import make_scorer

def WMAE(data, y, y_pred):
    weights = data["IsHoliday__Yes"].apply(lambda x: 5 if x == 1 else 1)
    return np.round(np.sum(weights * abs(y - y_pred)) / (np.sum(weights)), 2)

#wmae_scorer = make_scorer(WMAE, greater_is_better=False)

### create a validation set

In [None]:
from sklearn.model_selection import train_test_split

X_train_tr, X_valid_tr, y_train, y_valid = train_test_split(X_train_tr,y_train, test_size=0.2, random_state=42)

## Build a Deep Learning Model

In [None]:
import tensorflow as tf 
from tensorflow import keras

### create the neural network

In [None]:
model = keras.models.Sequential([
        keras.layers.Dense(64, activation="relu", input_shape=X_train_tr.shape[1:]),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(1)
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                10304     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 14,529
Trainable params: 14,529
Non-trainable params: 0
_________________________________________________________________


### compile the model 

In [None]:
model.compile(optimizer="rmsprop", loss= keras.losses.Huber(), metrics=['mae'])

### training and evaluating the model

In [None]:
history = model.fit(X_train_tr, y_train,
                    validation_data=(X_valid_tr, y_valid),
                    epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
history.history.keys()

dict_keys(['loss', 'mae', 'val_loss', 'val_mae'])

In [None]:
results = pd.DataFrame(history.history)
results.head()

Unnamed: 0,loss,mae,val_loss,val_mae
0,7871.092285,7871.578125,5054.26709,5054.76709
1,4312.129395,4312.624512,3774.827637,3775.331299
2,3490.036621,3490.527588,3343.181396,3343.679932
3,3171.311035,3171.804688,3086.646484,3087.14624
4,2926.99707,2927.500244,2863.880127,2864.379639


In [None]:
results.shape

(25, 4)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['loss'], 
    name="Training Loss" , 
    mode="lines",
    line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['val_loss'], 
    name="Validation Loss", 
    mode="lines",
    line=dict(color="green")))
fig.update_layout(title="Training vs Validation Loss",
                  xaxis=dict(title="Epochs"),
                  yaxis=dict(title="Huber Loss"))
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['mae'], 
    name="Training MAE" , 
    mode="lines",
    line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['val_mae'], 
    name="Validation MAE", 
    mode="lines",
    line=dict(color="green")))
fig.update_layout(title="Training vs Validation MAE",
                  xaxis=dict(title="Epochs"),
                  yaxis=dict(title="Mean Absolute Error"))
fig.show()

In [None]:
# make predictions and submission
y_pred = model.predict(X_test_tr)
sample_submission["Weekly_Sales"] = y_pred
save_dataframe(sample_submission, "deep_learning_huber.csv")

The wmae on the leaderboard for this model is `5228` which is relatively higher than the random forest model.

### create small network

In [None]:
keras.backend.clear_session()

model_small = keras.models.Sequential([
        keras.layers.Dense(16, activation="relu", input_shape=X_train_tr.shape[1:]),
        keras.layers.Dense(16, activation="relu"),
        keras.layers.Dense(1)
])
model_small.compile(optimizer="rmsprop", loss= keras.losses.Huber(), metrics=['mae'])
history_small = model_small.fit(X_train_tr, y_train,
                    validation_data=(X_valid_tr, y_valid),
                    epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
results_small = pd.DataFrame(history_small.history)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = list(range(len(results))) ,
                         y = results['val_loss'], 
                         name="Original Model", 
                         mode="lines", 
                         line=dict(color="blue")))

fig.add_trace(go.Scatter(x = list(range(len(results_small))) ,
                         y = results_small['val_loss'], 
                         name="Small Model", 
                         mode="lines", 
                         line=dict(color="green")))
fig.update_layout(title="Original Vs Small Model",
                  xaxis= dict(title="Epochs"),
                  yaxis= dict(title="Huber Loss"))
fig.show()

The small model is undefitting the data. Original model is performing better than this. 

#### Much Bigger Network

In [None]:
keras.backend.clear_session()

model_bigger = keras.models.Sequential([
        keras.layers.Dense(512, activation="relu", input_shape=X_train_tr.shape[1:]),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dense(1)
])
model_bigger.compile(optimizer="rmsprop", loss= keras.losses.Huber(), metrics=['mae'])
history_bigger = model_bigger.fit(X_train_tr, y_train,
                    validation_data=(X_valid_tr, y_valid),
                    epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
results_bigger = pd.DataFrame(history_bigger.history)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = list(range(len(results))) ,
                         y = results['val_loss'], 
                         name="Original Model", 
                         mode="lines", 
                         line=dict(color="blue")))

fig.add_trace(go.Scatter(x = list(range(len(results_small))) ,
                         y = results_bigger['val_loss'], 
                         name="Bigger Model", 
                         mode="lines", 
                         line=dict(color="green")))
fig.update_layout(title="Original Vs Bigger Model",
                  xaxis= dict(title="Epochs"),
                  yaxis= dict(title="Huber Loss"))
fig.show()

The loss on bigger model has decreased significantly compared to the original model. 

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = list(range(len(results))) ,
                         y = results_bigger['loss'], 
                         name="Training Loss", 
                         mode="lines", 
                         line=dict(color="blue")))

fig.add_trace(go.Scatter(x = list(range(len(results_small))) ,
                         y = results_bigger['val_loss'], 
                         name="Validation Loss", 
                         mode="lines", 
                         line=dict(color="green")))
fig.update_layout(title="Training vs Validation Loss Bigger Model",
                  xaxis= dict(title="Epochs"),
                  yaxis= dict(title="Huber Loss"))
fig.show()

The bigger model starts to overfit after 3rd epochs. Let's test this model performance then we will adjust it. 

In [None]:
# make predictions and submission
y_pred = model_bigger.predict(X_test_tr)
sample_submission["Weekly_Sales"] = y_pred
save_dataframe(sample_submission, "deep_learning_bigger_model.csv")

The score on the leaderboard is too much worse than the original model, it's wmae is 7791.

### Original Model with More Layers

In [None]:
keras.backend.clear_session()

model = keras.models.Sequential([
        keras.layers.Dense(64, activation="relu", input_shape=X_train_tr.shape[1:]),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(1)
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                10304     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 18,689
Trainable params: 18,689
Non-trainable params: 0
_________________________________________________________________


In [None]:
# compile and train the model
model.compile(optimizer="rmsprop", loss= keras.losses.Huber(), metrics=['mae'])
history = model.fit(X_train_tr, y_train,
                    validation_data=(X_valid_tr, y_valid),
                    epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
results = pd.DataFrame(history.history)
results.head()

Unnamed: 0,loss,mae,val_loss,val_mae
0,5209.587891,5210.10791,3207.904053,3208.403564
1,2789.965576,2790.458984,2526.441895,2526.941162
2,2358.408447,2358.912354,2299.817139,2300.31665
3,2174.087158,2174.588623,2108.257568,2108.757568
4,2080.625,2081.128662,2057.940918,2058.440674


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['loss'], 
    name="Training Loss" , 
    mode="lines",
    line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['val_loss'], 
    name="Validation Loss", 
    mode="lines",
    line=dict(color="green")))
fig.update_layout(title="Training vs Validation Loss",
                  xaxis=dict(title="Epochs"),
                  yaxis=dict(title="Huber Loss"))
fig.show()

#### Add Dropout layers

In [None]:
keras.backend.clear_session()

model = keras.models.Sequential([
        keras.layers.Dense(64, activation="relu", input_shape=X_train_tr.shape[1:]),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1)
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                10304     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6

In [None]:
# compile and train the model
model.compile(optimizer="rmsprop", loss= keras.losses.Huber(), metrics=['mae'])
history = model.fit(X_train_tr, y_train,
                    validation_data=(X_valid_tr, y_valid),
                    epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
results = pd.DataFrame(history.history)
results.head()

Unnamed: 0,loss,mae,val_loss,val_mae
0,5906.309082,5906.82373,3526.730225,3527.228271
1,3858.833984,3859.342285,3148.510498,3149.010498
2,3591.950439,3592.445312,2763.690674,2764.190674
3,3461.373535,3461.878906,2594.917725,2595.417236
4,3370.635254,3371.136719,2486.348389,2486.848145


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['loss'], 
    name="Training Loss" , 
    mode="lines",
    line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=list(range(len(results))), 
    y= results['val_loss'], 
    name="Validation Loss", 
    mode="lines",
    line=dict(color="green")))
fig.update_layout(title="Training vs Validation Loss",
                  xaxis=dict(title="Epochs"),
                  yaxis=dict(title="Huber Loss"))
fig.show()

In [None]:
# make predictions and submission
y_pred = model.predict(X_test_tr)
sample_submission["Weekly_Sales"] = y_pred
save_dataframe(sample_submission, "deep_learning_dropout_0.2.csv")

So, far this model perfomed the best out of all previous deep learning models. Also the validation loss is much lower than the training loss, so model is not also overfitting.

In [None]:
##