<a href="https://colab.research.google.com/github/adamoah/LSTM-Network/blob/main/GithubRepoCode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#import libraries
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import phik


#RMSE loss function
def root_mean_squared_error(y_true, y_pred):
  return K.sqrt(K.mean(K.square(y_pred - y_true)))


#Set seed for random number generator
np.random.seed(42)


#Data preparation
Chicago_df = pd.read_csv('Chicago-Network-Data.csv')

x = []
y = []
for i in range(8, 487):
  x.append([Chicago_df.iloc[i, 0], Chicago_df.iloc[i, 1]])
  y.append(Chicago_df.iloc[i, 2])

input_x = np.array(x)
output_y = np.array(y)

scaler = MinMaxScaler(feature_range=(0, 1))
input_x = scaler.fit_transform(input_x)
output_y = output_y.reshape(-1, 1)
output_y = scaler.fit_transform(output_y)

trainX, testX, trainY, testY = train_test_split(input_x, output_y, test_size=0.2, shuffle=False)

trainX = trainX.reshape(len(trainX), 1, trainX.shape[1])
testX = testX.reshape(len(testX), 1, testX.shape[1])


#initialize model
model = Sequential()
model.add(LSTM(4, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(4))
model.add(Dense(1))

model.compile(loss=root_mean_squared_error, optimizer='adam')
print('model is ready to use')

#run model training
history = model.fit(trainX, trainY, batch_size=1, epochs=100, verbose=2, validation_data=(testX, testY))


#print RMSE of the model for train and test
train_loss=model.evaluate(trainX, trainY)
test_loss = model.evaluate(testX, testY)
print('Train Loss: ', train_loss)
print('Test Loss: ', test_loss)

#graph model loss
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
ax.plot(history.history['loss'])
ax.plot(history.history['val_loss'])
ax.set_title('Chicago Model Loss')
ax.set_xlabel('epochs')
ax.set_ylabel('loss')
ax.legend(['train', 'test'], loc='upper right')


#get model predictions for train/test data
trainPredictions = model.predict(trainX)
testPredictions = model.predict(testX)

#invert predictions and observed data to be in original scale
output_y = scaler.inverse_transform(output_y)
trainPredictions = scaler.inverse_transform(trainPredictions)
testPredictions = scaler.inverse_transform(testPredictions)

#shift train predictions to match observed data
trainPredictionsPlot = np.empty_like(output_y)
trainPredictionsPlot[:, :] = np.nan
trainPredictionsPlot[0:len(trainPredictions), :] = trainPredictions

#shift test predictions to match observed data
testPredictionsPlot = np.empty_like(output_y)
testPredictionsPlot[:, :] = np.nan
testPredictionsPlot[len(trainPredictions):len(output_y), :] = testPredictions

#plot predictions
fig2, ax2 = plt.subplots(1, 1, figsize=(9, 8))
ax2.plot(output_y)
ax2.plot(trainPredictionsPlot)
ax2.plot(testPredictionsPlot)
ax2.set_title('Chicago Model Predictions')
ax2.set_ylabel('kg/kg')
ax2.set_xlabel('Months')
ax2.legend(['Observed', 'Train', 'Test'], loc='upper right')

#compile all data and convert into pandas df
predictions = np.concatenate((trainPredictions, testPredictions))
data = []
for i in range(0, len(x)):
  data.append([x[i][0], x[i][1], output_y.flatten()[i], predictions.flatten()[i]])
data = np.array(data)
df = pd.DataFrame(data, columns=['Temperature', 'Humidity', 'Observed Ozone', 'Predicted Ozone'])

#display phik correlation matrix of all columns
phik_overview = df.phik_matrix()
phik_overview

#save model
model.save('Chicago_Dataset')

#reset model
K.clear_session()
del model

In [None]:
#import libraries
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

#set random seed
np.random.seed(42)


#data parsing function
def data():

  Chicago_df = pd.read_csv('Chicago-Network-Data.csv')
  x = []
  y = []
  for i in range(8, 487):
    x.append([Chicago_df.iloc[i, 0], Chicago_df.iloc[i, 1]])
    y.append(Chicago_df.iloc[i, 2])
  input_x = np.array(x)
  output_y = np.array(y)
  scaler = MinMaxScaler(feature_range=(0, 1))
  input_x = scaler.fit_transform(input_x)
  output_y = output_y.reshape(-1, 1)
  output_y = scaler.fit_transform(output_y)
  trainX, testX, trainY, testY = train_test_split(input_x, output_y, test_size=0.2, shuffle=False)
  trainX = trainX.reshape(len(trainX), 1, trainX.shape[1])
  testX = testX.reshape(len(testX), 1, testX.shape[1])

  return trainX, trainY, testX, testY


#initilize model function and return necessary hyperas components
def create_model(trainX, trainY, testX, testY):

  layers = {{choice[(1, 2, 3)]}}
  model = Sequential()

  if layers == 1:
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}}, input_shape=(trainX.shape[1], trainX.shape[2])))
  elif layers == 2:
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}}, input_shape=(trainX.shape[1], trainX.shape[2])), return_sequences=True)
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}})
  else:
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}}, input_shape=(trainX.shape[1], trainX.shape[2])), return_sequences=True)
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}})
    model.add(LSTM({{choice([4, 8, 16, 32, 64])}})

  model.add(Dense(1))
  model.compile(loss='mse', optimizer={{choice(['adam', 'sgd', 'rmsprop'])}})
  model.fit(trainX, trainY, batch_size={{choice([1, 2, 3])}}, epochs={{choice([50, 100, 150, 200])}}, verbose=0, validation_data=(testX, testY))
  score = model.evaluate(testX, testY, verbose=0)
  
  return {'loss': score, 'status': STATUS_OK, 'model': model}


#install pydrive and authorization libraries
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Copy/download the file
fid = drive.ListFile({'q':"title='HyperasOptimization.ipynb'"}).GetList()[0]['id']
f = drive.CreateFile({'id': fid})
f.GetContentFile('HyperasOptimization.ipynb')


#run optimization
best_run, best_model = optim.minimize(model=create_model, data=data, algo=tpe.suggest, max_evals=25, notebook_name='HyperasOptimization', trials=Trials())
#display best performing model
trainX, trainY, testX, testY = data()
print("Best performing model evaluation: ")
print(best_model.evaluate(testX, testY))
print("Best model hyperparameters: ")
print(best_run)