# Keras Deep Learning

This is the rig for all the Deep Learning methods that use Keras

The code in this notebook draws heavily on Brownlee and therefore looks at:

1. MLP
2. 1D-CNN
3. Stacked LSTM
 

In [None]:
#Import libraries
from helpers import *

from sqlalchemy import create_engine

import psycopg2
import numpy as np
import pandas as pd
import datetime as dt

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import RepeatVector
from keras.layers import TimeDistributed

from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
%matplotlib inline

#This is suppress all warnings in the notebook - turn when happy code works
# import warnings
# warnings.filterwarnings('ignore')

In [None]:
#Redshift user credentials - set here
USER = 
PASSWORD = 

FCST_PERIOD = 9   #How many months I want to forecast ahead

In [None]:
#Create SQLAlchemy engine for Redshift database
user = USER
password = PASSWORD
host=
port='5439'
dbname='prod'

url = "postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}".format(user, password, host, port, dbname)
engine = create_engine(url)

## 1. Get the catalog of ISBN/countries

Hardcoded to Spain with demand in the year preceding the 9 months we want to forecast

In [None]:
#This is all hardcoded for the moment
#NB There's a lot of unnecessary stuff here. I'll want to rationalise when I tidy up all the notebooks
#NB This two stage approach is the one that I will want to use throughout

query = f"""
select
    isbn + ship_to_country_key as key,
    isbn,
    isbn_short,
    subject_2_key,
    series_key,
    series_short,
    family_key,
    family_name,
    ship_to_country_key as country,
    sum(quantity_demanded) as qty_12m
from r2ibp.f_demand_actual t1
left join r2ibp.lu_product t2
on t1.isbn = t2.isbn13
where last_day(date) <= current_date
and last_day(date) > dateadd(month, -{FCST_PERIOD}, current_date)
and ship_to_country_key = 'ES'
group by isbn, isbn_short, subject_2_key, series_key, series_short, family_key, family_name, ship_to_country_key
order by qty_12m desc
"""

conn = engine.connect()
df_catalog = pd.read_sql_query(query, conn)
conn.close()

## 2. Get demand data for the test cases

Read all the demand data for the selected ISBN/countries. In this case Spanish ISBNs

In [None]:
key_list = list(df_catalog['key'])
   
df_demand = get_demand(key_list, engine)

## 3. Pivot into a datafame

NB Drop negative values and replace NaNs (i.e. missing values) with zeroes

Also simplify the columns index

In [None]:
#I only need three columns from df_demand
df_temp = df_demand[['key', 'month', 'qty']]

df_pivoted = df_temp[df_temp['qty']>0].pivot(index='key', columns='month').fillna(0)
df_pivoted.columns = df_pivoted.columns.droplevel(0)

del df_temp

In [None]:
#A selection of data to look at
df_pivoted.iloc[-10:, 22:]

In [None]:
df_pivoted.shape

## 4. Prepare the data for modelling

The prediction will be for the last 9 months (set by PERIOD)

Scaled/ normalise the data - start by just scaling based on max value

Need to split the data both into X and y and train and test. As well as creating a validation set for the training performance from the train set.

Finally convert dataframes into numpy arrays to input into keras

In [None]:
#Scale the data 0-1 based on the max demand value
dfMax = df_pivoted.max(axis=1)
df_scaled = df_pivoted.divide(dfMax, axis=0)
df_scaled.tail()

In [None]:
#Set key parameters for data prep and modelling
n_features = 1 #i.e. a single quantity for each month

n_total_steps = df_scaled.shape[1] #i.e. the total number of months
n_steps_out = FCST_PERIOD
n_steps_in = n_total_steps - 2*n_steps_out # Need to chop off both the train and test 9 months!

#Split into train and test X and y
df_X = df_scaled.iloc[:, :-(2*n_steps_out)]
df_y = df_scaled.iloc[:, -(2*n_steps_out):-n_steps_out]

df_X_train, df_X_val, df_y_train, df_y_val = train_test_split(df_X, df_y) #default is 75:25 split

df_X_test = df_scaled.iloc[:, n_steps_out:-n_steps_out] #X_test needs to be the same length as X_train
df_y_test = df_scaled.iloc[:, -n_steps_out:]


In [None]:
#Convert dfs to numpy arrays
X_train = df_X_train.to_numpy()
X_val = df_X_val.to_numpy()
X_test = df_X_test.to_numpy()
y_train = df_y_train.to_numpy()
y_val = df_y_val.to_numpy()
y_test = df_y_test.to_numpy()

#Create y_naive from X_test
y_naive = y_naive1 = X_test[:, -12:FCST_PERIOD-12]

## 5. Model and Predict

Allow myself the option of running various models here.
These models are taken from the Brownlee pdf book

NB I also should run each of these models multiple times to see what variance there is in the results.
Compare with how Brownlee did this

In [None]:
def define_MLP():
    
    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim = n_steps_in))
    model.add(Dense(n_steps_out, activation='relu')) #This is to zero the negative values

    return model

In [None]:
def define_CNN():
    
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps_in, n_features)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(n_steps_out, activation='relu'))

    return model

In [None]:
def define_LSTM():
    
    model = Sequential()
    model.add(LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)))
    model.add(LSTM(100, activation='relu'))
    model.add(Dense(n_steps_out, activation = 'relu'))
    
    return model

In [None]:
#Choose model to run here
#There also needs to be some data reformating
MODEL = 'LSTM'

if MODEL == 'MLP':  
    model = define_MLP()
else:
    # reshape from [samples, timesteps] into [samples, timesteps, features]
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], n_features))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))

    if MODEL == 'CNN':
        model = define_CNN()
    elif MODEL == 'LSTM':
        model = define_LSTM()
    else:
        model = define_LSTM()
                            
#compile model - do this every time to reset the weights
model.compile(optimizer='adam', loss='mse')

#display what the model looks like
model.summary()

In [None]:
start = dt.datetime.now()

# fit model
history = model.fit(X_train, y_train, batch_size = 128, epochs=30, validation_data=(X_val, y_val), verbose=2)
#2000 epochs in Brownlee

time = dt.datetime.now() - start
print('Time to fit model', time)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
print('Validation set results:')
model.evaluate(X_val, y_val)

print('Test set results:')
model.evaluate(X_test, y_test);

In [None]:
#predict for the test set
yhat = model.predict(X_test, verbose=0)

#And convert to dataframe for later
df_yhat = pd.DataFrame(data=yhat, index=df_y_test.index, columns=df_y_test.columns)
#I'll rescale this later

In [None]:
df_yhat.describe()
#Never calcs full value either (it's never less than zero due to use of Relu in the output layer)

In [None]:
df_y_test.describe()
#Notice how the yhat values are significantly lower than the actuals (shown below)

In [None]:
df_yhat = df_yhat.mul(dfMax, axis = 0)

## 6. Calculate metrics and plot results

### Calculate metrics

Calculate total demand and RMSE metrics

In [None]:
#This function works across the whole arrays

def calc_prediction_metrics_from_array(y_test, yhat, y_naive1):
    
    sum_pred = np.sum(yhat, axis=1)
    sum_naive1 = np.sum(y_naive1, axis=1)
    sum_act = np.sum(y_test, axis=1)
    
    diff_pred_act = sum_pred - sum_act
    diff_naive1_act = sum_naive1 - sum_act
    
    abs_pred_closer = (abs(diff_pred_act) < abs(diff_naive1_act))
    
    rmse_pred = mean_squared_error(y_test.T, yhat.T, multioutput='raw_values', squared = False)
    rmse_naive1 = mean_squared_error(y_test.T, y_naive1.T, multioutput='raw_values', squared = False)
    
    pred_rmse_lower = (rmse_pred < rmse_naive1)
    rmse_pc_diff = ((rmse_pred - rmse_naive1)/rmse_naive1)*100

    return [sum_naive1, sum_pred, sum_act, diff_naive1_act, diff_pred_act, abs_pred_closer,
                                rmse_naive1, rmse_pred, pred_rmse_lower, rmse_pc_diff]

In [None]:
%%capture --no-display
#This suppresses all warnings - in this case divide by zero

if MODEL == 'MLP':  
    if FCST_PERIOD == 12:
        y_naive1 = X_test[:, -12:] #i.e. 12 months ago
    else:    
        y_naive1 = X_test[:, -12:FCST_PERIOD-12] #i.e. back 12 months and then PERIOD forward

else:
    if FCST_PERIOD == 12:
        y_naive1 = X_test[:, -12:, 0]
    else:    
        y_naive1 = X_test[:, -12:FCST_PERIOD-12, 0] # different input data structures
    
metrics = calc_prediction_metrics_from_array(y_test, yhat, y_naive1)

df_metrics = pd.DataFrame(df_X_test.index ,columns = ['key'])

for i in range(len(metrics)):
    df_metrics[i] = metrics[i]

df_metrics.columns = ['key', 'sum_naive1', 'sum_pred', 'sum_act', 'diff_naive1_act', 'diff_pred_act','abs_pred_closer',
                      'rmse_naive1', 'rmse_pred', 'pred_rmse_lower', 'rmse_pc_diff']

#Round all values to 2 dp
df_metrics = df_metrics.round(2)

### Plot Metrics

In [None]:
plot_pred_naive1(df_metrics)

In [None]:
#df_metrics_sorted = df_metrics.sort_values(by='diff_rmses_percent', ascending = True)
df_metrics_sorted = df_metrics.sort_values(by='rmse_pc_diff', ascending = True)
df_metrics_sorted.head()

## E.2 Plot selected ISBN countries

In [None]:
plot_list = ['9780521148597ES', '9780521148559ES', '9781108457651ES',
             '9781108794091ES', '9781108381208ES', '9788490365809ES',
             '9788490369883ES', '9788490361078ES', '9788490369975ES', '9780521221689ES']

In [None]:
#Set up grid for plotting
rows = int(np.ceil(len(plot_list)/2))  #round up
fig, axes = plt.subplots(rows, 2, figsize = (16,rows*4))
#The following is to iterate the axes
axes_flat = axes.flat

#Needed to get the period of pred (month values)
x_pred = df_y_test.columns

for i, key in enumerate(plot_list):
    
    actuals = df_pivoted[df_pivoted.index == key]
    #convert actuals to ts
    ts_actuals = pd.melt(actuals, var_name='month', value_name='qty')
    ts_actuals = ts_actuals.set_index('month')
    ts_actuals.index = pd.to_datetime(ts_actuals.index)
    
    #do the same for the predictions
    pred = df_yhat[df_yhat.index == key]
    ts_pred = pd.melt(pred, var_name='month', value_name='qty')
    ts_pred = ts_pred.set_index('month')
    ts_pred.index = pd.to_datetime(ts_pred.index)
  
    #and naive-1
    ts_naive1 = ts_actuals[-(12+FCST_PERIOD):-12].shift(periods = 12, freq = 'M')
    
    ax = axes_flat[i]
    ax.plot(ts_actuals[-24:], '-o', label="actuals") #Just the last 2 years
    ax.plot(ts_pred, '-o', label="predicted")
    ax.plot(ts_naive1, '-o', label="naive-1")
    ax.grid()
    ax.legend(fontsize=12)
    ax.set_title(key);
       
plt.tight_layout()
plt.show();

df_metrics[df_metrics['key'].isin(plot_list)]