# RNN and ConvNets

The data file ”data.csv” contains 3 time series x1, x2, and y along with the corresponding date column. The data ranges from beginning of 2019 to the end of Feb. of 2020. The objective of this problem is to make predictions for y for March 1st and 2nd in 2020.

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import LSTM, GRU, Conv1D
from keras.layers import Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.layers import Dropout, Flatten,MaxPooling1D
from tensorflow.keras.optimizers import RMSprop, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

from keras.utils import to_categorical # np_utils

from sklearn.preprocessing import MinMaxScaler

# Read the csv file
df = pd.read_csv('timeseriesData.csv')
# df.interpolate(method='linear', inplace=True)
# df.dropna(subset=['Date'], inplace=True)
df.tail(8)

In [None]:
df.info(verbose=True)

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
# Visualize the Data
fig = go.Figure()
fig.add_trace(go.Scatter(x = df['Date'], y = df['x1'], name = "x1"))
fig.add_trace(go.Scatter(x = df['Date'], y = df['x2'], name = 'x2'))
fig.add_trace(go.Scatter(x = df['Date'], y = df['y'], name = 'y'))
fig.update_layout(title = "x1, x2, and y")

In [None]:
# Convert x1 and x2 into numpy array
x_val_1 = df.loc[:,['x1', 'x2']].values
# Train 70% data, 15% for validation, 15% for test
trainPortion = round(x_val_1.shape[0]*0.7)
valPortion = round(x_val_1.shape[0]*0.15)
trainData = x_val_1[:trainPortion]
valData = x_val_1[trainPortion:trainPortion + valPortion]
testData = x_val_1[trainPortion + valPortion:]
print('We have %d training, %d validation, and %d test points' %(len(trainData), len(valData), len(testData)))

In [None]:
# Visualize the data subsets of x1 and x2
fig = go.Figure()
fig.add_trace(go.Scatter(x = df[:trainPortion]['Date'], y = df[:trainPortion]['y'], name = "Training Set"))
fig.add_trace(go.Scatter(x=df[trainPortion:trainPortion + valPortion]['Date'],
                         y = df[trainPortion:trainPortion + valPortion]['y'],
                         name = 'Validation Set'))
fig.add_trace(go.Scatter(x=df[trainPortion + valPortion:]['Date'],
                         y = df[trainPortion + valPortion:]['y'],
                         name = 'Test Set'))
fig.show()

In [None]:
# Print x1 and x2 shape
print(trainData.shape)
print(valData.shape)
print(testData.shape)

In [None]:
# Normalize x1 and x2 data
sc = MinMaxScaler()

sc.fit(trainData)
trainNorm = sc.transform(trainData)
valNorm = sc.transform(valData)
testNorm = sc.transform(testData)

1. Explore regular feedforward neural network models for this problem.

(a) Report the unnormalized MAE of the test set on your best model.

In [None]:
# Convert y into numpy array
y_val = df.loc[:,'y'].values
trainY = y_val[:trainPortion]
valY = y_val[trainPortion:trainPortion + valPortion]
testY= y_val[trainPortion + valPortion:]
print('We have %d training, %d validation, and %d test points' %(len(trainData), len(valData), len(testData)))

In [None]:
# Find MAE with regular feedforward network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape = (2,)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mae', optimizer='adam', metrics =['mean_absolute_error'])
model.summary()

In [None]:
checkpoint = EarlyStopping(monitor='val_loss',patience=5, verbose=1, mode='auto', restore_best_weights=True)
callbacks_list = [checkpoint]            
network = model.fit(trainNorm, trainY, validation_data=(valNorm, valY),
                    epochs=100, batch_size=64,callbacks=callbacks_list)

In [None]:
testNormPred= model.predict(testNorm)
mae = tf.keras.metrics.MeanAbsoluteError()
testMae = mae(testY.reshape(-1,1), testNormPred).numpy()

fig = go.Figure()
fig.add_trace(go.Scatter(y=testNormPred.reshape(-1,),
                         mode='markers',
                         name='Model Predictions on Test Set'))
fig.add_trace(go.Scatter(y=testY.reshape(-1,),
                         mode='markers',
                         name='Target Values for the Test Set'))
fig.update_layout(title_text='Unnormalized MAE Test = '
                  + str(np.mean(testMae)))
fig.show()

In [None]:
print('The best model and has an unnormalized MSE of: ', testMae) 

(b) Plot the loss curves for training and validation sets for the best model.

In [None]:
# Plot x1 and x2 loss curves
valMae = round(network.history['val_loss'][-1],2)
fig = go.Figure()
fig.add_trace(go.Scatter(y=network.history['loss'],
                         mode='lines',
                         name='Training Error'))
fig.add_trace(go.Scatter(y=network.history['val_loss'],
                         mode='lines',
                         name='Validation Error'))
fig.update_layout(yaxis_title = 'Mean Absolute Error',
                  xaxis_title = 'epoch',
                  title_text='Normalized MAE Validation = ' +
                  str(valMae))
fig.show()

(c) What are the predicted values of y for March 1st and March 2nd?

In [None]:
# Predict March 1st and March 2nd
march_values = testNorm[-1]
march1 = model.predict(march_values.reshape(1, 2))

march2 = model.predict(march_values.reshape(1,2))
march2

print(f'The predicted values of y for March 1st and March 2nd are: {march1[-1]} and {march2[-1]}')

2. Explore recurrent neural network models for this problem.

In [None]:
# Create a function for creating sequences
def createSeq(dataset_x, dataset_y, look_back, foresight):
    X, Y = [], []    
    for i in range(len(dataset_x)-look_back-foresight):
        obs = dataset_x[i:(i+look_back), :] #Sequence of "look_back"       
        X.append(obs)                   #Append stock price value occurring "foresight+1"
        Y.append(dataset_y[i + (look_back+foresight)])     
    return np.array(X), np.array(Y)

In [None]:
# Create sequences for data
trainNormX, trainNormY = createSeq(trainNorm, trainY, look_back = 7, foresight = 1)
valNormX, valNormY = createSeq(valNorm, valY, look_back = 7, foresight = 1)
testNormX, testNormY = createSeq(testNorm, testY, look_back = 7, foresight = 1)
print(trainNormX.shape, trainNormY.shape)
print(valNormX.shape, valNormY.shape)
print(testNormX.shape, testNormY.shape)

(a) Report the unnormalized MAE of the test set on your best model. 

In [None]:
# Find MAE with RNN
model = Sequential()
model.add(LSTM(64, input_shape=(7,2), dropout=0.1, recurrent_dropout=0.1)) # input_shape(,n) n is the number of features
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mae', optimizer='adam', metrics =['mean_absolute_error'])
model.summary()

In [None]:
checkpoint = EarlyStopping(monitor='val_loss',patience=5, verbose=1, mode='auto', restore_best_weights=True)
callbacks_list = [checkpoint]            
network = model.fit(trainNormX, trainNormY, validation_data=(valNormX, valNormY),
                    epochs=100, batch_size=64,callbacks=callbacks_list)

In [None]:
testNormPred= model.predict(testNormX)
testPred = testNormPred
testY = testNormY.reshape(-1,1)
mae = tf.keras.metrics.MeanAbsoluteError()
testMae = mae(testY, testPred).numpy()

fig = go.Figure()
fig.add_trace(go.Scatter(y=testPred.reshape(-1,),
                         mode='markers',
                         name='Model Predictions on Test Set'))
fig.add_trace(go.Scatter(y=testY.reshape(-1,),
                         mode='markers',
                         name='Target Values for the Test Set'))
fig.update_layout(title_text='Unnormalized MAE Test = '
                  + str(np.mean(testMae)))
fig.show()

In [None]:
print('The best model and has an unnormalized MSE of: ', testMae) 

(b) Plot the loss curves for training and validation sets for the best model. 

In [None]:
# Plot x1 and x2 loss curves
valMae = round(network.history['val_loss'][-1],2)
fig = go.Figure()
fig.add_trace(go.Scatter(y=network.history['loss'],
                         mode='lines',
                         name='Training Error'))
fig.add_trace(go.Scatter(y=network.history['val_loss'],
                         mode='lines',
                         name='Validation Error'))
fig.update_layout(yaxis_title = 'Mean Absolute Error',
                  xaxis_title = 'epoch',
                  title_text='Normalized MAE Validation = ' +
                  str(valMae))
fig.show()

(c) What are the predicted values of y for March 1st and March 2nd?

In [None]:
# Predict March 1st and March 2nd
march_values = testNormX[-2]
march1 = model.predict(march_values.reshape(1, 7, 2))

march_shift = testNormX[-1]
march2 = model.predict(march_shift.reshape(1, 7, 2))

print(f'The predicted values of y for March 1st and March 2nd are: {march1[-1]} and {march2[-1]}')

3. Explore 1d convolutional neural network models for this problem.

(a) Report the unnormalized MAE of the test set on your best model. 

In [None]:
# Find MAE of x1 and x2 with Conv1D
model = Sequential()
model.add(Conv1D(64, kernel_size=5, input_shape=(7,2), activation = 'relu')) # input_shape(,n) n is the number of features
model.add(MaxPooling1D(pool_size = 2))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mae', optimizer='adam', metrics =['mean_absolute_error'])
model.summary()

In [None]:
checkpoint = EarlyStopping(monitor='val_loss',patience=5, verbose=1, mode='auto', restore_best_weights=True)
callbacks_list = [checkpoint]
network = model.fit(trainNormX, trainNormY, validation_data=(valNormX, valNormY),
                    epochs=100, batch_size=64,callbacks=callbacks_list)

In [None]:
testNormPred= model.predict(testNormX)
testPred = testNormPred
testY = testNormY
mae = tf.keras.metrics.MeanAbsoluteError()
testMae = mae(testY, testPred).numpy()

fig = go.Figure()
fig.add_trace(go.Scatter(y=testPred.reshape(-1,),
                         mode='markers',
                         name='Model Predictions on Test Set'))
fig.add_trace(go.Scatter(y=testY.reshape(-1,),
                         mode='markers',
                         name='Target Values for the Test Set'))
fig.update_layout(title_text='Unnormalized MAE Test = '
                  + str(np.mean(testMae)))
fig.show()

In [None]:
print('The best model and has an unnormalized MSE of: ', testMae) 

(b) Plot the loss curves for training and validation sets for the best model. 

In [None]:
# Plot x1 and x2 loss curves
valMae = round(network.history['val_loss'][-1],2)
fig = go.Figure()
fig.add_trace(go.Scatter(y=network.history['loss'],
                         mode='lines',
                         name='Training Error'))
fig.add_trace(go.Scatter(y=network.history['val_loss'],
                         mode='lines',
                         name='Validation Error'))
fig.update_layout(yaxis_title = 'Mean Absolute Error',
                  xaxis_title = 'epoch',
                  title_text='Normalized MAE Validation = ' +
                  str(valMae))
fig.show()

(c) What are the predicted values of y for March 1st and March 2nd?

In [None]:
# Predict March 1st and March 2nd
march_values = testNormX[-2]
march1 = model.predict(march_values.reshape(1, 7, 2))

march_shift = testNormX[-1]
march2 = model.predict(march_shift.reshape(1, 7, 2))

print(f'The predicted values of y for March 1st and March 2nd are: {march1[-1]} and {march2[-1]}')

In this problem, you need to apply all ML foundation and techniques that you’ve learned in the program so far. These include but are not limited to: <b>data cleaning, data imputation for missing values, full exploratory analysis along with relative visualisations, statistical tests, data normalization, data split, addressing overfitting/underfitting, tuning hyperparameters, model assessment, etc.. Feel free to apply the techniques you’ve learned in your time-series course here when relevant.</b>