In [224]:
import pandas as pd
import numpy as np
from IPython.display import display
import plotly.express as px

In [225]:
def run_sequence_plot(x, y, title, xtitle, ytitle):
    figure = px.line(x=x, y=y)
    figure.update_layout(title=title, width=800, xaxis_title=xtitle, yaxis_title=ytitle)
    return figure


# Read parquet dataset

In [226]:
df = pd.read_parquet("data/household.parquet")

# Exclude data before July 2007
df = df.loc["2007-07":]
df_monthly = df.resample("M").quantile(0.99)
var = "Global_active_power"
fig = run_sequence_plot(
    df_monthly.index, df_monthly[var], f"Monthly 99% percentile {var}", "Time", f"{var}"
)
fig.show()

# Naive forecast

In [267]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

months_predict = 10
y_true = df_monthly[var].iloc[-months_predict:]
y_predict = df_monthly[var].iloc[-months_predict - 1 : -1]

print(
    f"MAE: {mean_squared_error(y_true, y_predict):.4f}, MAPE: {mean_absolute_percentage_error(y_true, y_predict):.4f}"
)
# print(f"y: {y_true}\ny_shift: {y_predict}")

MAE: 0.2142, MAPE: 0.0968


In [228]:
figure = px.line(x=df_monthly.index, y=df_monthly[var])
figure.add_scatter(x=y_true.index, y=y_predict, mode="lines", name="Naive forecast")
figure.update_layout(
    title=f"Monthly 99% percentile {var}",
    width=1000,
    xaxis_title="Time",
    yaxis_title=f"{var}",
)
figure.show()

# Simple RNN

In [229]:
import keras
from keras.models import Sequential
from keras.layers import (
    Dense,
    SimpleRNN,
    LSTM,
    Activation,
    Dropout,
    TimeDistributed,
    GRU,
    Flatten,
)

In [230]:
def get_keras_format_series(series):
    """
    Convert a series to a numpy array of shape
    [n_samples (how many time series),
     time_steps (how many time steps in my series),
      features (how many time series referring to different features)]
    """

    series = np.array(series.values)
    return series.reshape(series.shape[0], series.shape[1], 1)

In [231]:
import os
from enum import Enum


class KerasCoreModel(Enum):
    simpleRNN = 1
    lstm = 2
    gru = 3


def neural_net_time_series(
    train_X,
    train_y,
    val_X,
    val_y,
    model_name: str,
    cell_units: int,
    epochs: int,
    core_model: KerasCoreModel,
    n_layers: int,
):

    # initialize model
    model = Sequential()

    shape_in = [None, 1]

    # construct a layer with specified number of hidden units
    # per cell and desired sequence input format

    print(f"Model loaded: {core_model}")
    if core_model.value == KerasCoreModel.simpleRNN.value:
        print("RNN")
        for i in range(n_layers):
            if i == n_layers - 1:
                print(
                    "model.add(SimpleRNN(cell_units, input_shape=(train_X.shape[1],1)))"
                )
                model.add(SimpleRNN(cell_units, input_shape=shape_in))
            else:
                print(
                    "model.add(SimpleRNN(cell_units, input_shape=(train_X.shape[1],1), return_sequences=True))"
                )
                model.add(
                    SimpleRNN(cell_units, input_shape=shape_in, return_sequences=True)
                )

    elif core_model.value == KerasCoreModel.lstm.value:
        print("LSTM")
        for i in range(n_layers):
            if i == n_layers - 1:
                print("model.add(LSTM(cell_units, input_shape=(train_X.shape[1],1)))")
                model.add(LSTM(cell_units, input_shape=shape_in))
            else:
                print(
                    "model.add(LSTM(cell_units, input_shape=(train_X.shape[1],1), return_sequences=True))"
                )
                model.add(LSTM(cell_units, input_shape=shape_in, return_sequences=True))
    else:
        print("GRU")
        for i in range(n_layers):
            if i == n_layers - 1:
                print("model.add(GRU(cell_units, input_shape=(train_X.shape[1],1)))")
                model.add(GRU(cell_units, input_shape=shape_in))
            else:
                print(
                    "model.add(GRU(cell_units, input_shape=(train_X.shape[1],1), return_sequences=True))"
                )
                model.add(GRU(cell_units, input_shape=shape_in, return_sequences=True))

    # add an output layer to make final predictions
    model.add(Dense(1))

    # define the loss function / optimization strategy, and fit
    # the model with the desired number of passes over the data (epochs)
    model.compile(loss="mean_squared_error", optimizer="adam")

    filename = os.path.join("models", model_name + ".h5")

    chk_save = [keras.callbacks.ModelCheckpoint(filename, save_best_only=True)]

    history = model.fit(
        train_X,
        train_y,
        epochs=epochs,
        batch_size=64,
        verbose=1,
        validation_data=(val_X, val_y),
        callbacks=chk_save,
    )
    model.summary()
    return model, history

In [262]:
# Train: use to train the model
# Test: use for final prediction test
look_back_steps = 10
tseries = df_monthly[var].reset_index(drop=True)

train = tseries[: -look_back_steps - 1]
test = tseries[-look_back_steps - 1 :]

print(f"Train X:\n{train}\n\nTest X:\n{test}")

Train X:
0     4.67122
1     4.68922
2     5.04406
3     5.34600
4     5.50606
5     5.84922
6     5.32800
7     4.54000
8     4.84722
9     4.75006
10    4.97244
11    4.79000
12    4.20000
13    4.39966
14    4.58802
15    4.98722
16    5.18200
17    5.04322
18    5.52400
19    4.46200
20    4.56600
21    4.42004
22    4.19122
23    3.75602
24    4.10000
25    4.41800
26    4.23400
27    4.82722
28    5.44400
29    5.09044
30    4.87406
Name: Global_active_power, dtype: float64

Test X:
31    4.84084
32    4.51406
33    4.01282
34    4.11602
35    4.03282
36    3.41000
37    3.97004
38    3.87928
39    4.70402
40    4.43200
41    3.88200
Name: Global_active_power, dtype: float64


In [263]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        tr = dataset[i : (i + look_back), np.newaxis]
        dataX.append(tr)
        dataY.append(dataset[i + look_back, np.newaxis])
    return np.array(dataX, dtype=np.float32), np.array(dataY, dtype=np.float32)


def create_dataset_test(dataset, train, look_back=1, train_lb=12):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):

        if -train_lb + 1 + i != 0:
            copy_train = train[-train_lb + 1 + i :]
        else:
            copy_train = pd.Series([])

        series_list = [copy_train, dataset[:i], dataset[i : i + look_back]]

        dataX.append(pd.concat(series_list)[:, np.newaxis])
        dataY.append(dataset[i + look_back, np.newaxis])

    return np.array(dataX, dtype=np.float32), np.array(dataY, dtype=np.float32)

In [264]:
train_X, train_y = create_dataset(train, look_back=look_back_steps)
test_X, test_y = create_dataset_test(test, train, look_back=1, train_lb=look_back_steps)


print(f"Shape train X: {train_X.shape}")
print(f"Shape train y: {train_y.shape}")
print(f"Shape test X: {test_X.shape}")
print(f"Shape test y: {test_y.shape}")

Shape train X: (20, 10, 1)
Shape train y: (20, 1)
Shape test X: (10, 10, 1)
Shape test y: (10, 1)



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.





In [235]:
# X_train = get_keras_format_series(train)
# y_train = np.array(df_monthly.loc[train, var])
#
# X_test = get_keras_format_series(df_monthly.loc[test])
# y_test = np.array(df_monthly.loc[test, var])

In [271]:
model_rnn, train_history = neural_net_time_series(
    train_X,
    train_y,
    test_X,
    test_y,
    model_name="simple_rnn",
    cell_units=20,
    epochs=600,
    core_model=KerasCoreModel.simpleRNN,
    n_layers=1,
)

Model loaded: KerasCoreModel.simpleRNN
RNN
model.add(SimpleRNN(cell_units, input_shape=(train_X.shape[1],1)))
Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Ep

In [272]:
model_rnn = keras.models.load_model("models/simple_rnn.h5")
mse_test = model_rnn.evaluate(test_X, test_y)
print(f"MSE Test: {mse_test:.4f}")

y_pred = model_rnn.predict(test_X).flatten()
print(
    f"MAE: {mean_squared_error(y_true, y_pred):.4f}, MAPE: {mean_absolute_percentage_error(y_true, y_pred):.4f}"
)

MSE Test: 0.1418
MAE: 0.1418, MAPE: 0.0748


# Training curves

In [273]:
epochs_df = pd.DataFrame(train_history.history)
display(epochs_df)
fig = px.line(epochs_df)
fig.show()

Unnamed: 0,loss,val_loss
0,20.602638,14.960551
1,19.939259,14.381579
2,19.286793,13.815409
3,18.644602,13.261894
4,18.012592,12.721014
...,...,...
595,0.134960,0.230827
596,0.134894,0.230769
597,0.134828,0.230711
598,0.134762,0.230653


# Plot estimation

In [274]:
figure = px.line(x=df_monthly.index, y=df_monthly[var])
figure.add_scatter(
    x=y_true.index,
    y=model_rnn.predict(test_X).flatten(),
    mode="lines",
    name="Simple RNN",
)
figure.update_layout(
    title=f"Monthly 99% percentile {var}",
    width=1000,
    xaxis_title="Time",
    yaxis_title=f"{var}",
)
figure.show()

# Long-Short term memory (LSTM)

In [279]:
model_lstm, train_history_ltsm = neural_net_time_series(
    train_X,
    train_y,
    test_X,
    test_y,
    cell_units=30,
    epochs=500,
    core_model=KerasCoreModel.lstm,
    n_layers=1,
    model_name="simple_ltsm",
)

Model loaded: KerasCoreModel.lstm
LSTM
model.add(LSTM(cell_units, input_shape=(train_X.shape[1],1)))
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/50

In [280]:
model_lstm = keras.models.load_model("models/simple_ltsm.h5")
y_pred_lstm = model_lstm.predict(test_X).flatten()
print(
    f"MAE: {mean_squared_error(y_true, y_pred_lstm):.4f}, MAPE: {mean_absolute_percentage_error(y_true, y_pred_lstm):.4f}"
)

MAE: 0.1278, MAPE: 0.0698


In [281]:
epochs_lstm_df = pd.DataFrame(train_history_ltsm.history)
display(epochs_lstm_df)
fig = px.line(epochs_lstm_df)
fig.show()

Unnamed: 0,loss,val_loss
0,30.759192,24.024384
1,30.105957,23.443239
2,29.466761,22.875681
3,28.841726,22.321728
4,28.230862,21.781206
...,...,...
495,0.208259,0.438407
496,0.208238,0.438349
497,0.208217,0.438291
498,0.208196,0.438233


In [282]:
figure = px.line(x=df_monthly.index, y=df_monthly[var])
figure.add_scatter(x=y_true.index, y=y_pred_lstm, mode="lines", name="Simple LTSM")
figure.update_layout(
    title=f"Monthly 99% percentile {var}",
    width=1000,
    xaxis_title="Time",
    yaxis_title=f"{var}",
)
figure.show()

# Stack two LSTM layers


In [244]:
model_double_lstm, train_history_double_ltsm = neural_net_time_series(
    train_X,
    train_y,
    test_X,
    test_y,
    cell_units=10,
    epochs=400,
    core_model=KerasCoreModel.lstm,
    n_layers=2,
    model_name="double_ltsm",
)

Model loaded: KerasCoreModel.lstm
LSTM
model.add(LSTM(cell_units, input_shape=(train_X.shape[1],1), return_sequences=True))
model.add(LSTM(cell_units, input_shape=(train_X.shape[1],1)))
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoc

In [245]:
model_double_lstm = keras.models.load_model("models/double_ltsm.h5")
y_pred_2lstm = model_double_lstm.predict(test_X).flatten()
print(
    f"MAE: {mean_squared_error(y_true, y_pred_2lstm):.4f}, MAPE: {mean_absolute_percentage_error(y_true, y_pred_2lstm):.4f}"
)

MAE: 0.1773, MAPE: 0.0876


In [246]:
epochs_2lstm_df = pd.DataFrame(train_history_double_ltsm.history)
display(epochs_2lstm_df)
fig = px.line(epochs_2lstm_df)
fig.show()

Unnamed: 0,loss,val_loss
0,20.397760,14.969345
1,20.160313,14.767198
2,19.921362,14.564003
3,19.680882,14.359749
4,19.438869,14.154431
...,...,...
395,0.186689,0.562713
396,0.186689,0.562730
397,0.186688,0.562744
398,0.186688,0.562759


In [247]:
figure = px.line(x=df_monthly.index, y=df_monthly[var])
figure.add_scatter(x=y_true.index, y=y_pred_2lstm, mode="lines", name="Double LSTM")
figure.update_layout(
    title=f"Monthly 99% percentile {var}",
    width=1000,
    xaxis_title="Time",
    yaxis_title=f"{var}",
)
figure.show()

# GRU

In [248]:
model_gru, train_history_double_gru = neural_net_time_series(
    train_X,
    train_y,
    test_X,
    test_y,
    cell_units=25,
    epochs=500,
    core_model=KerasCoreModel.gru,
    n_layers=2,
    model_name="double_gru",
)

Model loaded: KerasCoreModel.gru
GRU
model.add(GRU(cell_units, input_shape=(train_X.shape[1],1), return_sequences=True))
model.add(GRU(cell_units, input_shape=(train_X.shape[1],1)))
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64

In [249]:
model_double_gru = keras.models.load_model("models/double_gru.h5")
y_pred_2gru = model_double_gru.predict(test_X).flatten()
print(
    f"MAE: {mean_squared_error(y_true, y_pred_2gru):.4f}, MAPE: {mean_absolute_percentage_error(y_true, y_pred_2gru):.4f}"
)

MAE: 0.1866, MAPE: 0.0862


In [250]:
epochs_2gru_df = pd.DataFrame(train_history_double_gru.history)
display(epochs_2gru_df)
fig = px.line(epochs_2gru_df)
fig.show()

Unnamed: 0,loss,val_loss
0,24.099360,16.916092
1,22.689432,15.785130
2,21.322645,14.696640
3,20.001431,13.652027
4,18.727936,12.652133
...,...,...
495,0.161251,0.437554
496,0.161184,0.437384
497,0.161117,0.437213
498,0.161050,0.437042


In [251]:
figure = px.line(x=df_monthly.index, y=df_monthly[var])
figure.add_scatter(x=y_true.index, y=y_pred_2gru, mode="lines", name="Double GRU")
figure.update_layout(
    title=f"Monthly 99% percentile {var}",
    width=1000,
    xaxis_title="Time",
    yaxis_title=f"{var}",
)
figure.show()