In [15]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import utils

# For LSTM model
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from tqdm.keras import TqdmCallback

# For hyperopt (parameter optimization)
from hyperopt import Trials, STATUS_OK, tpe, fmin, hp
from hyperopt.pyll.base import scope  # quniform returns float, some parameters require int; use this to force int

# Evaluation metrics
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

%matplotlib inline

In [3]:
def format_data(data, weeks=1):
    X_total = data.values[:209, :]

    data = X_total[(53-weeks):, :]
    print(data.shape)
    scaler, values = utils.scale(data)
    values = utils.series_to_supervised(values, n_in=weeks, n_out=1, dropnan=True).values

    y_scaler, y = utils.scale(data[:, 1].reshape((len(data), 1)))

    train = values[:140, :]
    valid = values[140:148]
    test = values[148:, :]
    print(train.shape, valid.shape, test.shape)

    features = 13
    obs = weeks*features

    y = values[:, -features:]

    trainX = train[:, :obs]
    trainY = train[:, -features:][:, 1]
    validX = valid[:, :obs]
    validY = valid[:, -features:][:, 1]
    testX = test[:, :obs]
    testY = test[:, -features:][:, 1]

    trainX = trainX.reshape((trainX.shape[0], weeks, features))
    validX = validX.reshape((validX.shape[0], weeks, features))
    testX = testX.reshape((testX.shape[0], weeks, features))

    return trainX, trainY, validX, validY, testX, testY

In [1]:
def format_scale_data(data, weeks=1):
    X_total = data.values[:209, :]

    data = X_total[(53-weeks):, :]
    print(data.shape)
    scaler, values = utils.scale(data)
    values = utils.series_to_supervised(values, n_in=weeks, n_out=1, dropnan=True).values

    y_scaler, y = utils.scale(data[:, 1].reshape((len(data), 1)))

    train = values[:140, :]
    valid = values[140:148]
    test = values[148:, :]
    print(train.shape, valid.shape, test.shape)

    features = 13
    obs = weeks*features

    y = values[:, -features:]

    trainX = train[:, :obs]
    trainY = train[:, -features:][:, 1]
    validX = valid[:, :obs]
    validY = valid[:, -features:][:, 1]
    testX = test[:, :obs]
    testY = test[:, -features:][:, 1]

    trainX = trainX.reshape((trainX.shape[0], weeks, features))
    validX = validX.reshape((validX.shape[0], weeks, features))
    testX = testX.reshape((testX.shape[0], weeks, features))

    return trainX, trainY, validX, validY, testX, testY, scaler, y_scaler

In [18]:
def run():
    def f_nn(params):
        # Generate data with given window
        saltlake_week = pd.read_csv('../saltlake_week.csv')
        data = saltlake_week[['Cases', 'VMT (Veh-Miles)', 'News Sentiment', 'Unemployment Rate', 'PRCP', 'SNWD',
                              'Percent_Fully_Vaccinated_5&Older', 'TAVG',
                              'Stay at Home', 'Mask', 'School Opening', 'Health Emergency', 'Holiday']]
        trainX, trainY, validX, validY, testX, testY = format_data(data=data, weeks=params['weeks'])
        
        # Keras LSTM model
        model = Sequential()

        if params['layers'] == 1:
            model.add(LSTM(units=params['units'], input_shape=(trainX.shape[1], trainX.shape[2]),
                           activation=params['activation']))                
            model.add(Dropout(rate=params['dropout']))
        else:
            # First layer specifies input_shape and returns sequences
            model.add(
                LSTM(units=params['units'], return_sequences=True, input_shape=(trainX.shape[1], trainX.shape[2]),
                        activation=params['activation']))
            model.add(Dropout(rate=params['dropout']))
            # Middle layers return sequences
            for i in range(params['layers'] - 2):
                model.add(LSTM(units=params['units'], return_sequences=True, activation=params['activation']))
                model.add(Dropout(rate=params['dropout']))
            # Last layer doesn't return anything
            model.add(LSTM(units=params['units'], activation=params['activation']))
            model.add(Dropout(rate=params['dropout']))

        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True)

        result = model.fit(trainX, trainY, verbose=0, validation_data=(validX, validY),
                            batch_size=4,
                            epochs=200,
                            callbacks=[es, TqdmCallback(verbose=1)]
                            )

        # get the lowest validation loss of the training epochs
        validation_loss = np.amin(result.history['val_loss'])
        print('Best validation loss of epoch:', validation_loss)


        return {'loss': validation_loss, 'status': STATUS_OK, 'model': model, 'params': params}

    # hyperparameters to search over with hyperopt
    space = {'dropout': hp.uniform('dropout', 0.01, 0.5),
             'units': scope.int(hp.quniform('units', 8, 128, 4)),
             'layers': scope.int(hp.quniform('layers', 1, 6, 1)),
             'weeks': scope.int(hp.quniform('weeks', 1, 10, 1)),
             'activation': hp.choice('activation', ['relu', 'sigmoid', 'tanh'])
             }

    trials = Trials()
    best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials)

    # get best model
    best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model']
    best_params = trials.results[np.argmin([r['loss'] for r in trials.results])]['params']

    print(best_params)
    print(best_model.summary())
    best_model.save('Model/LSTM')
    
    saltlake_week = pd.read_csv('../saltlake_week.csv')
    data = saltlake_week[['Cases', 'VMT (Veh-Miles)', 'News Sentiment', 'Unemployment Rate', 'PRCP', 'SNWD',
                          'Percent_Fully_Vaccinated_5&Older', 'TAVG', 
                          'Stay at Home', 'Mask', 'School Opening', 'Health Emergency', 'Holiday']]
    trainX, trainY, validX, validY, testX, testY, scaler, y_scaler = format_scale_data(data=data, weeks=best_params['weeks'])
    
    best_model.evaluate(testX, testY)
    
    yhat_test = best_model.predict(testX)
    yhat_test_inv = y_scaler.inverse_transform(yhat_test).reshape((-1, 1))
    testY_inv = y_scaler.inverse_transform(testY.reshape((-1, 1)))
    
    yhat_train = best_model.predict(trainX).reshape((-1, 1))
    yhat_train_inv = y_scaler.inverse_transform(yhat_train)
    trainY_inv = y_scaler.inverse_transform(trainY.reshape((-1, 1)))
    
    print("Mean Squared Error: {}".format(mean_squared_error(testY_inv, yhat_test_inv)))
    print("Root Mean Squared Error: {}".format(mean_squared_error(testY_inv, yhat_test_inv, squared=False)))
    print("Mean Absolute Percentage Error: {}".format(mean_absolute_percentage_error(testY_inv, yhat_test_inv)))
    
    plt.figure()
    plt.plot(yhat_test_inv, label='Predicted')
    plt.plot(testY_inv, label='True')
    plt.legend()
    plt.show()
    
    plt.figure()
    plt.plot(yhat_train_inv, label='Predicted')
    plt.plot(trainY_inv, label='True')
    plt.legend()
    plt.show()

In [None]:
run()

(164, 13)                                             
(140, 117)                                            
(8, 117)                                              
(8, 117)                                              
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 5.
Epoch 00015: early stopping                           
Best validation loss of epoch:                        
0.004936600103974342                                  
(157, 13)                                                                         
(140, 26)                                                                         
(8, 26)                                                                           
(8, 26)                                                                           
  2%|▏         | 1/50 [00:28<23:06, 28.30s/trial, best loss: 0.004936600103974342]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 1.                        
Epoch 00011: early stopping                                                       
Best validation loss of epoch:                                                    
0.023326240479946136                                                              
(158, 13)                                                                         
(140, 39)                                                                         
(8, 39)                                                                           
(8, 39)                                                                           
  4%|▍         | 2/50 [00:34<12:03, 15.08s/trial, best loss: 0.004936600103974342]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 3.                        
Epoch 00013: early stopping                                                       
Best validation loss of epoch:                                                    
0.005484757944941521                                                              
(166, 13)                                                                         
(140, 143)                                                                        
(8, 143)                                                                          
(8, 143)                                                                          
  6%|▌         | 3/50 [00:42<09:29, 12.12s/trial, best loss: 0.004936600103974342]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 2.                        
Epoch 00012: early stopping                                                       
Best validation loss of epoch:                                                    
0.0005819080979563296                                                             
(161, 13)                                                                          
(140, 78)                                                                          
(8, 78)                                                                            
(8, 78)                                                                            
  8%|▊         | 4/50 [01:01<11:18, 14.74s/trial, best loss: 0.0005819080979563296]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 2.                         
Epoch 00012: early stopping                                                        
Best validation loss of epoch:                                                     
0.011005940847098827                                                               
(158, 13)                                                                          
(140, 39)                                                                          
(8, 39)                                                                            
(8, 39)                                                                            
 10%|█         | 5/50 [01:18<11:41, 15.58s/trial, best loss: 0.0005819080979563296]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 3.                         
Epoch 00013: early stopping                                                        
Best validation loss of epoch:                                                     
0.0005510802147909999                                                              
(161, 13)                                                                          
(140, 78)                                                                          
(8, 78)                                                                            
(8, 78)                                                                            
 12%|█▏        | 6/50 [01:38<12:22, 16.88s/trial, best loss: 0.0005510802147909999]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Restoring model weights from the end of the best epoch: 10.                        
Epoch 00020: early stopping                                                        
Best validation loss of epoch:                                                     
0.00034942664206027985                                                             
(165, 13)                                                                           
(140, 130)                                                                          
(8, 130)                                                                            
(8, 130)                                                                            
 14%|█▍        | 7/50 [01:50<11:09, 15.58s/trial, best loss: 0.00034942664206027985]

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]