In [1]:
import config
import os
import random
import utils

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# For LSTM model
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from tqdm.keras import TqdmCallback

# For hyperopt (parameter optimization)
from hyperopt import Trials, STATUS_OK, tpe, fmin, hp
from hyperopt.pyll.base import scope  # quniform returns float, some parameters require int; use this to force int

# Evaluation metrics
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

%matplotlib inline

In [2]:
seed_value = 42
os.environ['PYTHONHASHSEED'] = str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

In [3]:
def format_scale_data(data, weeks=1):
    X_total = data.values

    data = X_total[(53-weeks):, :]
    print(data.shape)
    scaler, values = utils.scale(data)
    values = utils.series_to_supervised(values, n_in=weeks, n_out=1, dropnan=True).values

    y_scaler, y = utils.scale(data[:, 1].reshape((len(data), 1)))

    train = values[:140, :]
    valid1 = values[140:144, :]
    valid2 = values[144:148, :]
    test = values[148:, :]
    

    features = 13
    obs = weeks*features

    y = values[:, -features:]

    trainX = train[:, :obs]
    trainY = train[:, -features:][:, 1]
    validX1 = valid1[:, :obs]
    validY1 = valid1[:, -features:][:, 1]
    validX2 = valid2[:, :obs]
    validY2 = valid2[:, -features:][:, 1]
    testX = test[:, :obs]
    testY = test[:, -features:][:, 1]

    trainX = trainX.reshape((trainX.shape[0], weeks, features))
    validX1 = validX1.reshape((validX1.shape[0], weeks, features))
    validX2 = validX2.reshape((validX2.shape[0], weeks, features))
    testX = testX.reshape((testX.shape[0], weeks, features))
    
    print(trainX.shape, validX1.shape, validX2.shape, testX.shape)

    return trainX, trainY, validX1, validY1, validX2, validY2, testX, testY, scaler, y_scaler

In [4]:
def run():
    def f_nn(params):
        # Generate data with given window
        saltlake_week = pd.read_csv('../saltlake_week.csv')
        data = saltlake_week[['Cases', 'VMT (Veh-Miles)', 'News Sentiment', 'Unemployment Rate', 'PRCP', 'SNWD',
                              'Percent_Fully_Vaccinated_5&Older', 'TAVG',
                              'Stay at Home', 'Mask', 'School Opening', 'Health Emergency', 'Holiday']]
        trainX, trainY, validX1, validY1, validX2, validY2, testX, testY, scaler, y_scaler = format_scale_data(data=data, 
                                                                                                               weeks=params['weeks'])
        
        # Keras LSTM model
        model = Sequential()

        model.add(LSTM(units=params['units_1'], input_shape=(trainX.shape[1], trainX.shape[2]),
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_2'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_3'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_4'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_5'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_6'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_7'],
                       activation='relu', return_sequences=True))                
        model.add(Dropout(rate=params['dropout']))
        model.add(LSTM(units=params['units_8'],
                       activation='relu'))                
        model.add(Dropout(rate=params['dropout']))
        model.add(Dense(1))
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=params['learning_rate']), 
                      loss='mean_squared_error', metrics=['mse', 'mape'])

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True)

        result = model.fit(trainX, trainY, verbose=0, validation_data=(validX1, validY1),
                            batch_size=4,
                            epochs=200,
                            callbacks=[es, TqdmCallback(verbose=1)]
                            )

        # get the lowest validation loss of the training epochs
        validation_loss = np.amin(result.history['val_loss'])
        print('Best validation loss of epoch:', validation_loss)


        return {'loss': validation_loss, 'status': STATUS_OK, 'model': model, 'params': params}

    # hyperparameters to search over with hyperopt
    space = {'dropout': hp.uniform('dropout', 0.01, 0.5),
             'units_1': scope.int(hp.quniform('units_1', 8, 128, 4)),
             'units_2': scope.int(hp.quniform('units_2', 8, 128, 4)),
             'units_3': scope.int(hp.quniform('units_3', 8, 128, 4)),
             'units_4': scope.int(hp.quniform('units_4', 8, 128, 4)),
             'units_5': scope.int(hp.quniform('units_5', 8, 128, 4)),
             'units_6': scope.int(hp.quniform('units_6', 8, 128, 4)),
             'units_7': scope.int(hp.quniform('units_7', 8, 128, 4)),
             'units_8': scope.int(hp.quniform('units_8', 8, 128, 4)),
             'weeks': scope.int(hp.quniform('weeks', 1, 10, 1)),
             'learning_rate': hp.uniform('learning_rate', 0.001, 0.1)
             }

    trials = Trials()
    best = fmin(f_nn, space, algo=tpe.suggest, max_evals=60, trials=trials)

    # get best model
    best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model']
    best_params = trials.results[np.argmin([r['loss'] for r in trials.results])]['params']
    
    # save best model
    print(best_params)
    print(best_model.summary())
    best_model.save('Model/LSTM-8')
    
    # load data
    saltlake_week = pd.read_csv('../saltlake_week.csv')
    data = saltlake_week[['Cases', 'VMT (Veh-Miles)', 'News Sentiment', 'Unemployment Rate', 'PRCP', 'SNWD',
                          'Percent_Fully_Vaccinated_5&Older', 'TAVG', 
                          'Stay at Home', 'Mask', 'School Opening', 'Health Emergency', 'Holiday']]
    trainX, trainY, validX1, validY1, validX2, validY2, testX, testY, scaler, y_scaler = format_scale_data(data=data, 
                                                                                                            weeks=best_params['weeks'])
    
    # evaluate model on second validation set
    best_model.evaluate(validX2, validY2)
    
    # invert predictions of model
    yhat_valid2 = best_model.predict(validX2)
    yhat_valid2_inv = y_scaler.inverse_transform(yhat_valid2)
    validY2_inv = y_scaler.inverse_transform(validY2.reshape((-1, 1)))
    
    print(yhat_valid2.shape)
    print(yhat_valid2_inv.shape)
    print(validY2_inv.shape)
    
    yhat_train = best_model.predict(trainX)
    yhat_train_inv = y_scaler.inverse_transform(yhat_train)
    trainY_inv = y_scaler.inverse_transform(trainY.reshape((-1, 1)))
    
    # evaluate model with inverted features
    print("Mean Squared Error: {}".format(mean_squared_error(validY2_inv, yhat_valid2_inv)))
    print("Root Mean Squared Error: {}".format(mean_squared_error(validY2_inv, yhat_valid2_inv, squared=False)))
    print("Mean Absolute Percentage Error: {}".format(mean_absolute_percentage_error(validY2_inv, yhat_valid2_inv)))
    
    # plot model predictions
    plt.figure()
    plt.plot(yhat_valid2_inv, label='Predicted')
    plt.plot(validY2_inv, label='True')
    plt.legend()
    plt.title("Validation Data 2")
    plt.show()
    
    plt.figure()
    plt.plot(yhat_train_inv, label='Predicted')
    plt.plot(trainY_inv, label='True')
    plt.legend()
    plt.title("Training Data")
    plt.show()
    
    return best_model

In [None]:
model8 = run()