In [None]:
import pandas as pd
import numpy as np
import random
from statsmodels.tsa.stattools import pacf
from matplotlib.pyplot import figure
from statsmodels.tsa.arima.model import ARIMA
# !pip install scikit-learn==0.24
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_lattice as tfl
import matplotlib.ticker as ticker
from matplotlib import font_manager as fm

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# import data
df = pd.read_csv("CACases.csv", index_col = False)
df.reset_index(drop=True, inplace=True)
df.head()

In [None]:
# counties we are interested in
counties = ["Los Angeles", "San Diego", "San Francisco", "Santa Barbara",\
            "Fresno", "Sacramento", "Ventura", "Riverside"]

In [None]:
# create a time series of cases, each value is an average of 7 previous days(include the current date)
# for each county
df1 = df.copy()  # deep copy
df1 = df1.iloc[6:]
for county in counties:
    # holder
    temp = []
    # take average, starting from 7th day
    for i in range(6,len(df)):
        # remove day of week effect
        ave = np.mean(df[county].iloc[i-6:i+1])  # 6 previous days and the exact date, 7 days' average
        temp.append(ave)
    df1[county] = temp
df1.set_index("date", inplace=True)

# Data Smoothing
- Remove day of week effect by replace $y_t$ with an average of 7 previous days(inclusive)
    - Each value is an average of data from all 7 different weekdays, thud day of week effect removed

In [None]:
# create a time series of cases, each value is an average of 7 previous days(include the current date)
# for each county
df1 = df.copy()  # deep copy
df1 = df1.iloc[6:]
for county in counties:
    # holder
    temp = []
    # take average, starting from 7th day
    for i in range(6,len(df)):
        # remove day of week effect
        ave = np.mean(df[county].iloc[i-6:i+1])  # 6 previous days and the exact date, 7 days' average
        temp.append(ave)
    df1[county] = temp
df1.set_index("date", inplace=True)

In [None]:
# input: original dataset, county we want
# output: all data, sorted by date, of that county
def extract(dataset, county):
    # select data only from the input county
    temp = dataset.to_dict()[county]
    temp = pd.Series(temp)
    # reset index
    # temp.reset_index(drop=True, inplace=True)
    return temp

In [None]:
la = extract(df1, "Los Angeles")
la_raw = extract(df, "Los Angeles")
la_raw

In [None]:
la_raw.plot(label = 'raw')
la.plot(linewidth = 2, label = "smoothed")
plt.legend()
plt.show()

In [None]:
max(la)

# Data Preprocessing
## Differencing
- Differencing of time series in discrete time
- transformation of series to a new time series wehre values are the difference between consecutive values of previous series
- help stablizing the mean of time series by removing trend

In [None]:
# create a differenced series, output as Series
# interval: order of differencing, default = 1
def difference(dataset, interval=1):
    diff = []
    for i in range(interval, len(dataset)):
        # no need to difference
        if (interval == 0):
            value = dataset[i]
        else:
            value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)
# invert differenced value
# input: single original, single prediction, interval(default as 1)
def invert_difference(history, yhat, interval=1):
    if (interval == 0):
        return yhat
    else:
        return yhat + history

## Scaling
- Scale the data to [-1,1]

In [None]:
# scale train and test data to [-1, 1]
# input: train(np array, train_size*1), test(np array, test_size*1)
def scale(train, test):
    # find scalers
    aveTrain = np.mean(train)
    maxTrain = max(train)
    minTrain = min(train)
    scaler = [aveTrain, maxTrain, minTrain]
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = (train - aveTrain)/(maxTrain-minTrain)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = (test - aveTrain)/(maxTrain-minTrain)
    return scaler, train_scaled, test_scaled
# inverse scaling for a forecasted value
# input: scaler, single prediction yhat
def invert_scale(scaler, yhat):
    inverted = yhat*(scaler[1]-scaler[2]) + scaler[0]
    return inverted[0][0]  # output: a number

## Reshaping

In [None]:
# convert to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = [], []
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg  # output: a data frame
# prepare data for AR
def prepare_AR(series, diff, train_size, test_size, n_lag, n_seq):
    # extract raw values
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    # differencing
    diff_series = difference(raw_values, diff)
    diff_values = diff_series.values
    diff_values = diff_values.reshape(len(diff_values), 1)
    # split into train & test
    train_diff, test_diff = diff_values[:(train_size-diff)], diff_values[(train_size-diff):]
    # rescaling
    scaler, train_scaled, test_scaled = scale(train_diff, test_diff)
    # adjust data type for test_scaled
    # test_scaled = [row[0][0] for row in test_scaled]
    # convert to supervised
    train = [row[0][0] for row in train_scaled]
    test = series_to_supervised(test_scaled, n_lag, n_seq).values
    return scaler, train, test  # scaler: list, train: list, test: np array
# prepare data for neural networks
def prepare_nn(series, diff, train_size, test_size, n_lag, n_seq):
    # extract raw values
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    # differencing
    diff_series = difference(raw_values, diff)
    diff_values = diff_series.values
    diff_values = diff_values.reshape(len(diff_values), 1)
    # split into train & test
    train_diff, test_diff = diff_values[:(train_size-diff)], diff_values[(train_size-diff):]
    # rescaling
    scaler, train_scaled, test_scaled = scale(train_diff, test_diff)
    # adjust data type for train_scaled, test_scaled
    train_scaled = [row[0][0] for row in train_scaled]
    test_scaled = [row[0][0] for row in test_scaled]
    # convert to supervised
    train = series_to_supervised(train_scaled, n_lag, n_seq).values
    test = series_to_supervised(test_scaled, n_lag, n_seq).values
    return scaler, train, test  # scaler: list of np array, train: np array, test: np array

# Model Training

In [None]:
# fit an LSTM network to training data
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
    # reshape training into [samples, timesteps, features]
    X, y = train[:, 0:n_lag], train[:, n_lag:]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    # design network
    model = keras.Sequential()
    model.add(layers.LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
    model.add(layers.Dense(y.shape[1]))
    model.compile(loss='mean_squared_error', optimizer='adam')
    # fit network
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
        model.reset_states()
    return model

In [None]:
class alpha_layer(keras.layers.Layer):
    def __init__(self, input_dim=1, units=1):
        super(alpha_layer, self).__init__()
        # check with initializer.get_config()
        initializer = tf.keras.initializers.RandomUniform(minval = 0, maxval = 1)
        self.alpha = self.add_weight(shape=(input_dim, units), initializer=initializer, 
                                     constraint=lambda x: tf.clip_by_value(x, 0, 1), trainable=True)

    def call(self, input1, input2):
        return tf.matmul(input1, self.alpha) + tf.matmul(input2, (1-self.alpha))
# fit the hybrid model
def fit_comb(train, n_lag, n_seq, n_batch, n_epoch, n_neurons):
    X, y = train[:, 0:n_lag], train[:, n_lag:]
    Z = X.reshape(X.shape[0], 1, X.shape[1])
    # AR layer
    input1 = tf.keras.layers.Input(shape=(X.shape[1],))
    AR = tfl.layers.Linear(num_input_dims=X.shape[1], units=n_seq)(input1)
    
    # lstm layer
    input2 = tf.keras.layers.Input(shape=(Z.shape[1],Z.shape[2]))
    lstm = layers.LSTM(n_seq)(input2)
    # fully connected lstm
    lstm_connected = tf.keras.layers.Dense(n_seq)(lstm)
    
    # alpha layer
    layer = alpha_layer()
    #added = layer(AR, lstm)
    added = layer(AR, lstm_connected)
    model = tf.keras.models.Model(inputs=[input1,input2], outputs=added)
    # compile before training or testing
    model.compile(loss='mean_squared_error', optimizer='adam')
    # fit network
    for i in range(n_epoch):
        model.fit([X, Z], y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
        model.reset_states()
    return model

# Make Forecasts

In [None]:
# evaluate the model
def make_AR(model, raw_value, test, test_size, n_lag, n_seq, diff, scaler):
    forecasts = []
    prev = raw_value[-(test_size+diff):(len(raw_value)-diff)]  # Y_(t-diff)  # (train_size+n_lag-1):-1
    for i in range(test_size):
        X = test[i, 0:n_lag]  # X_test, will not use y_test
        # make forecast
        forecast = model.params[0]  # initlialize with coefficient
        for j in range(n_lag):
            forecast += model.params[j+1] * X[j]  # forecast = a0 + a_i * X_i for i = 1,...,7
        # invert scaling
        forecast = invert_scale(scaler, forecast)
        # invert differencing
        forecast = invert_difference(prev[i], forecast, diff)
        # store the forecast
        forecasts.append(forecast)
    return forecasts

In [None]:
# make one forecast with an LSTM
def forecast_lstm(model, X, n_batch):
    # reshape input pattern to [samples, timesteps, features]
    X = X.reshape(1, 1, len(X))
    # make forecast
    forecast = model.predict(X, batch_size=n_batch)
    # convert to array
    return [x for x in forecast[0, :]][0]
# evaluate the model
def make_lstm(model, n_batch, raw_value, test, test_size, n_lag, n_seq, diff, scaler):
    forecasts = []
    prev = raw_value[-(test_size+diff):(len(raw_value)-diff)]  # Y_(t-diff)
    for i in range(len(test)):
        X = test[i, 0:n_lag]
        # make forecast
        forecast = forecast_lstm(model, X, n_batch)
        # invert scaling
        forecast = invert_scale(scaler, forecast)
        # invert differencing
        forecast = invert_difference(prev[i], forecast)
        # store the forecast
        forecasts.append(forecast)
    return forecasts

In [None]:
# make one forecast with a hybird model
def forecast_comb(model, X, n_batch):
    # reshape input pattern to [samples, timesteps, features]
    Z = X.reshape(1, 1, len(X))
    X = X.reshape(1, len(X))
    # make forecast
    forecast = model.predict([X,Z], batch_size=n_batch)
    return [x for x in forecast[0, :]][0]
    return [x for x in forecast[0, :]][0]
# evaluate the model
def make_comb(model, n_batch, raw_value, test, test_size, n_lag, n_seq, diff, scaler):
    forecasts = []
    prev = raw_value[-(test_size+diff):(len(raw_value)-diff)]  # Y_(t-diff)
    for i in range(len(test)):
        X = test[i, 0:n_lag]
        # make forecast
        forecast = forecast_comb(model, X, n_batch)
        # invert scaling
        forecast = invert_scale(scaler, forecast)
        # invert differencing
        forecast = invert_difference(prev[i], forecast)
        # store the forecast
        forecasts.append(forecast)
    return forecasts

## Evaluation with RMSE, MAE, MAPE

In [None]:
# evaluate RMSE for each step in forcasting
def evaluate_forecasts(truth, forecasts):
    #rmse = np.sqrt(mean_squared_error(truth, forecasts))
    #mae = mean_absolute_error(truth, forecasts)
    truth = np.array(truth)
    forecasts = np.array(forecasts)
    mape = mean_absolute_percentage_error(truth, forecasts)*100
    #return rmse, mae, mape
    return mape

# Find Standard Error

In [None]:
import scipy.stats

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

In [None]:
def uncertainty(data):
    Mean = []
    Std = []
    for i in range(test_size):
        temp = []
        for j in range(len(data)):
            temp.append(data[j][i])  # all predictions for the i's date
        mean, std = mean_confidence_interval(temp)
        Mean.append(mean)
        Std.append(std)
    return np.array(Mean), np.array(Std)

# Perform on Interesting Trials

In [None]:
# given historical observations (t, t-1, t-2, … t-n+1) forecast t+1(future 1 day)
n_lag = 7  # number of lag, use 7 past days
n_seq = 1  # predict 1 future days
diff = 1 # order of difference
train_size = 63  # train set size
test_size = 18 # test set size

n_step = 7 

n_batch = 1
nb_epoch = 100
n_neurons = 1

In [None]:
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')

# Case 1
## Curved Training Data and Down Trend Testing: San Diego 2020-12-03 to 2021-02-28

In [None]:
county = "San Diego"
s = extract(df1, county)
raw_value = s[300:300+(train_size+test_size+n_lag)]
raw_value

In [None]:
#plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
plt.legend(fontsize= "large")
plt.savefig("SD_raw_2020-12-03_2021-02-28.png", dpi=600)

In [None]:
# holders
mape_sd = []  # mape
pred_sd = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_sd.append(temp)
    pred_sd.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sd[i][0]) 
    lstm.append(pred_sd[i][1])
    hybrid.append(pred_sd[i][2])
    
AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "San Diego"
s = extract(df1, county)
raw_value = s[300:300+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "SD_2020-12-03_2021-02-28.png"
plt.savefig(fig_name, dpi=600)

In [None]:
# check mean mape
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_sd[i][0]) 
    mape_lstm.append(mape_sd[i][1])
    mape_hybrid.append(mape_sd[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
# check maximum standard error
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sd[i][0]) 
    lstm.append(pred_sd[i][1])
    hybrid.append(pred_sd[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

# Case 2
## Up Trend Training and Down Trend Testing: San Francisco 2020-02-17 to 2020-05-14

In [None]:
county = "San Francisco"
s = extract(df1, county)
raw_value = s[10:10+(train_size+test_size+n_lag)]
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend(fontsize= "large")
#plt.savefig("SF_raw_2020-02-17_2020-05-14.png", dpi=600)
plt.show()

In [None]:
# holders
mape_sf = []  # mape
pred_sf = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_sf.append(temp)
    pred_sf.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sf[i][0]) 
    lstm.append(pred_sf[i][1])
    hybrid.append(pred_sf[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "San Francisco"
s = extract(df1, county)
raw_value = s[10:10+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "SF_2020-02-17_2020-05-14.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_sf[i][0]) 
    mape_lstm.append(mape_sf[i][1])
    mape_hybrid.append(mape_sf[i][2])

In [None]:
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sf[i][0]) 
    lstm.append(pred_sf[i][1])
    hybrid.append(pred_sf[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

# Case 3
## Up Trend Training and Up Trend Testing: Los Angeles 2020-09-24 to 2020-12-20

In [None]:
county = "Los Angeles"
s = extract(df1, county)
raw_value = s[230:230+(train_size+test_size+n_lag)]
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
plt.rcParams.update({'font.size': 14})
fig.set_size_inches(18, 6, forward=True)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend()
plt.savefig("LA_raw_2020-09-24_2020-12-20.png", dpi = 600)

In [None]:
# holders
mape_la = []  # mape
pred_la = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_la.append(temp)
    pred_la.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_la[i][0]) 
    lstm.append(pred_la[i][1])
    hybrid.append(pred_la[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "Los Angeles"
s = extract(df1, county)
raw_value = s[230:230+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "LA_2020-09-24_2020-12-20.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_la[i][0]) 
    mape_lstm.append(mape_la[i][1])
    mape_hybrid.append(mape_la[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
print(np.mean(lstm_std))
print(np.mean(hybrid_std))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_la[i][0]) 
    lstm.append(pred_la[i][1])
    hybrid.append(pred_la[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(np.max(lstm_std))
print(np.max(hybrid_std))

# Case 4
## Down Trend Training and Down Trend Testing: San Francisco 2022-06-10 to 2022-09-05

In [None]:
county = "San Francisco"
s = extract(df1, county)
raw_value = s[-(train_size+test_size+n_lag):]  # last trial
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend(fontsize = "large")
plt.savefig("SF_raw_2022-06-10_2022-09-05.png", dpi=600)

In [None]:
# holders
mape_sf_2 = []  # mape
pred_sf_2 = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_sf_2.append(temp)
    pred_sf_2.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sf_2[i][0]) 
    lstm.append(pred_sf_2[i][1])
    hybrid.append(pred_sf_2[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "San Francisco"
s = extract(df1, county)
raw_value = s[-(train_size+test_size+n_lag):]  # last trial
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "SF_2022-06-10_2022-09-05.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_sf_2[i][0]) 
    mape_lstm.append(mape_sf_2[i][1])
    mape_hybrid.append(mape_sf_2[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sf_2[i][0]) 
    lstm.append(pred_sf_2[i][1])
    hybrid.append(pred_sf_2[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

# Case 5
## Down Trend Training and Up Trend Testing: Santa Barbara 2022-01-17 to 2022-04-14

In [None]:
county = "Santa Barbara"
s = extract(df1, county)
raw_value = s[710:710+(train_size+test_size+n_lag)]
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend(fontsize = "large")
# plt.savefig("SB_raw_2022-01-17_2022-04-14.png", dpi=600)
plt.show()

In [None]:
# holders
mape_sb = []  # mape
pred_sb = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_sb.append(temp)
    pred_sb.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sb[i][0]) 
    lstm.append(pred_sb[i][1])
    hybrid.append(pred_sb[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "Santa Barbara"
s = extract(df1, county)
raw_value = s[710:710+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "SB_2022-01-17_2022-04-14.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_sb[i][0]) 
    mape_lstm.append(mape_sb[i][1])
    mape_hybrid.append(mape_sb[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_sb[i][0]) 
    lstm.append(pred_sb[i][1])
    hybrid.append(pred_sb[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

## Riverside 2022-02-16 to 2022-05-14

In [None]:
county = "Riverside"
s = extract(df1, county)
raw_value = s[740:740+(train_size+test_size+n_lag)]
raw_value

In [None]:
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend(fontsize = "large")
# plt.savefig("Riv_raw_2022-02-16_2022-05-14.png", dpi=600)
plt.show()

In [None]:
# holders
mape_riv = []  # mape
pred_riv = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_riv.append(temp)
    pred_riv.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_riv[i][0]) 
    lstm.append(pred_riv[i][1])
    hybrid.append(pred_riv[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "Riverside"
s = extract(df1, county)
raw_value = s[740:740+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "Riv_2022-02-16_2022-05-14.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_riv[i][0]) 
    mape_lstm.append(mape_riv[i][1])
    mape_hybrid.append(mape_riv[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_riv[i][0]) 
    lstm.append(pred_riv[i][1])
    hybrid.append(pred_riv[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

# Case 6
## Jegged Testing: Fresno 2021-02-11 to 2021-05-09

In [None]:
county = "Fresno"
s = extract(df1, county)
raw_value = s[370:370+(train_size+test_size+n_lag)]
raw_value

In [None]:
plt.rcParams.update({'font.size': 12})
fig.set_size_inches(18, 6, forward=True)
plt.plot(raw_value.values, color = 'cornflowerblue', linewidth = 2)
# visualize the split of test/train
plt.axvline(x = train_size, color = 'orange', linewidth = 2, label = 'train ends')
plt.axvline(x = train_size+n_lag, color = 'green', linewidth = 2, label = 'test starts')
#plt.title("Training and Testing Set", fontname = "Times New Roman", fontsize='large', fontweight='bold')
plt.legend(fontsize='large')
# plt.savefig("Fres_raw_2021-02-11_2021-05-09.png", dpi=600)
plt.show()

In [None]:
# holders
mape_fres = []  # mape
pred_fres = []  # predictions
# prepare data
truth = raw_value[-test_size:].to_list()
# for AR
ARscaler, ARtrain, ARtest = prepare_AR(raw_value, diff, train_size, test_size, n_lag, n_seq)
NNscaler, NNtrain, NNtest = prepare_nn(raw_value, diff, train_size, test_size, n_lag, n_seq)
for i in range(100):
    temp = []
    # AR
    model_AR = ARIMA(ARtrain, order = (n_lag,0,0), trend = 'c')  # AR, I, MA
    model_AR = model_AR.fit()
    # make forecasts
    forecasts_AR = make_AR(model_AR, raw_value, ARtest, test_size, n_lag, n_seq, diff, ARscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_AR))
    # lstm
    model_lstm = fit_lstm(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts_lstm = make_lstm(model_lstm, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts_lstm))
    # hybrid
    model = fit_comb(NNtrain, n_lag, n_seq, n_batch, nb_epoch, n_neurons)
    forecasts = make_comb(model, n_batch, raw_value, NNtest, test_size, n_lag, n_seq, diff, NNscaler)
    temp.append(evaluate_forecasts(raw_value[-test_size:], forecasts))
    # record
    mape_fres.append(temp)
    pred_fres.append([forecasts_AR, forecasts_lstm, forecasts])

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_fres[i][0]) 
    lstm.append(pred_fres[i][1])
    hybrid.append(pred_fres[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)

# prepare data
county = "Fresno"
s = extract(df1, county)
raw_value = s[370:370+(train_size+test_size+n_lag)]
truth = raw_value[-test_size:].to_list()
# plot
plt.rcParams.update({'font.size': 20})
fig, axs = plt.subplots(1, 3, sharex='col', sharey='row')
fig.set_size_inches(16, 10, forward=True)
x = np.linspace(0, 18, 18)
# AR
axs[0].plot(x, AR_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[0].fill_between(x, AR_mean - 2*AR_std, AR_mean + 2*AR_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[0].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[0].set_title("AR", fontsize='large')

axs[1].plot(x, lstm_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[1].fill_between(x, lstm_mean - 2*lstm_std, lstm_mean + 2*lstm_std,
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[1].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[1].set_title("LSTM", fontsize='large')

axs[2].plot(x, hybrid_mean, color = 'orange', linewidth = 2, label = "prediction")
axs[2].fill_between(x, hybrid_mean - 2*hybrid_std, hybrid_mean + 2*hybrid_std, 
                edgecolor='#CC4F1B', facecolor='#FEEACC', linestyle='dashdot', antialiased=True)
axs[2].plot(x, truth, 'b', linewidth = 2, label = "ground truth")
axs[2].set_title("Hybrid", fontsize='large')

plt.subplots_adjust(wspace=0.05)
plt.legend(fontsize= "large")
#plt.legend(prop={"family": "Times New Roman"}, fontsize= "x-large")
fig_name = "Fres_2021-02-11_2021-05-09.png"
plt.savefig(fig_name, dpi=600)

In [None]:
mape_AR = []
mape_lstm = []
mape_hybrid = []
for i in range(100):
    mape_AR.append(mape_fres[i][0]) 
    mape_lstm.append(mape_fres[i][1])
    mape_hybrid.append(mape_fres[i][2])
print(np.mean(mape_AR))
print(np.mean(mape_lstm))
print(np.mean(mape_hybrid))

In [None]:
AR = []
lstm = []
hybrid = []
for i in range(100):
    AR.append(pred_fres[i][0]) 
    lstm.append(pred_fres[i][1])
    hybrid.append(pred_fres[i][2])

AR_mean, AR_std = uncertainty(AR)
lstm_mean, lstm_std = uncertainty(lstm)
hybrid_mean, hybrid_std = uncertainty(hybrid)
print(max(lstm_std))
print(max(hybrid_std))

# Save Data

In [None]:
pd.DataFrame(mape_sd).to_csv("mape_sd.csv")
pd.DataFrame(pred_sd).to_csv("mape_sd.csv")

pd.DataFrame(mape_sf).to_csv("mape_sf.csv")
pd.DataFrame(pred_sf).to_csv("mape_sf.csv")

pd.DataFrame(mape_la).to_csv("mape_la.csv")
pd.DataFrame(pred_la).to_csv("mape_la.csv")

pd.DataFrame(mape_sb).to_csv("mape_sb.csv")
pd.DataFrame(pred_sb).to_csv("mape_sb.csv")

pd.DataFrame(mape_sf_2).to_csv("mape_sf_2.csv")
pd.DataFrame(pred_sf_2).to_csv("mape_sf_2.csv")

pd.DataFrame(mape_riv).to_csv("mape_riv.csv")
pd.DataFrame(pred_riv).to_csv("mape_riv.csv")

pd.DataFrame(mape_fres).to_csv("mape_fres.csv")
pd.DataFrame(pred_fres).to_csv("mape_fres.csv")