In [1]:
import os
import numpy as np
import pandas as pd 
from utils import plot_series, split_dataset, windowed_dataset, tune_learning_rate
import plotly.graph_objs as go

from dateutil.relativedelta import relativedelta

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Lambda

In [2]:
# Fit Model
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

def fit_model(model,trainset, lr, init_weights, epochs):
    """
    Function to fit model
    Inputs
        - Tensorflow model
        - trainset : array, dims = 1 (np.array)
        - lr : learning rate (float)
        - init_wights : Initial Weights (float)
        - epochs : number of epochs for training (int)
    Outputs
        - history : model history (dict)
        - fig : figure show loss and mae (plotly figure)
    """
    # Reset states genertated by keras and reset weights
    tf.keras.backend.clear_session()
    model.set_weights(init_weights)

    # Initialize optimizer
    optimizer = tf.keras.optimizers.SGD(learning_rate = lr, momentum = 0.9)

    # Set the training parameters
    model.compile(loss = tf.keras.losses.Huber(), optimizer = optimizer, metrics = ["mae"])

    # Train the model
    history = model.fit(trainset, epochs = epochs)

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x = np.arange(0, epochs),
        y = history.history["loss"],
        name = "loss"
    ))
    fig.add_trace(go.Scatter(
        x = np.arange(0, epochs),
        y = history.history["mae"],
        name = "mae"
    ))

    fig.update_layout(xaxis_title = "epochs", yaxis_title = "loss/mae", width = 1500, height = 800, template = 'plotly_white')

    return history, fig

# Forecast 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

def model_forecast(model, series, window_size, batch_size):
    """
    Uses an input model to generate predictions on data windows

    Args :
        - model (TF Keras Model) - model that accepts data windows
        - series (array of floats) - contains the values of the time series
        - window_size (int) - the number of time steps to include in the window
        - batch_size (int) - the batch-size

    Returns:
        forecast (np.array) - array containing predictions
    """
    #Generate a TF Dataset from series values
    dataset = tf.data.Dataset.from_tensor_slices(series)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size, shift = 1, drop_remainder = True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda w: w.batch(window_size))

    # Create batches of the window
    dataset = dataset.batch(batch_size).prefetch(1)

    # Get the prediction on the entire dataset
    forecast = model.predict(dataset)
    return forecast

In [3]:
def load_data(path, last_date = "2022-05-15 23:00:00", endpoint=31):
    df = pd.read_excel(path, header = None)
    df = df[:-endpoint]
    df.rename(columns = {0 : "y"}, inplace = True)
    last_ts = pd.to_datetime(last_date, format = "%Y-%m-%d %H:%M:%S")
    ts = [last_ts - relativedelta(hours=  x) for x in range(df.shape[0])]
    ts.sort()
    df["date"] = ts
    return df 


In [4]:
# Load Dataset
# --------------------------------------------------------------------------------------------------
# Notes
#   - you will have to remove `index_col = 0` for other datasets
print(os.getcwd())
file_path = "../tutorials/tf_and_keras/data/Sunspots.csv"
#df = pd.read_csv(file_path, index_col=0) 
#df.rename(columns = {"Date" : "date", "Monthly Mean Total Sunspot Number" : "sunspots"}, inplace = True)

df = load_data("data/load.xlsx")

# Plot original TS
p1 = go.Figure()
p1 = plot_series(df["date"], df["y"], p1, "energy load")
p1.update_layout(width = 800, height = 500)
p1.show()
#p1.show()

/Users/imantha/workspace/cryo-polygen/ts-forecasting


In [5]:
# Test - Train Split
# -------------------------------------------------------------------------------------------------
# Notes
#   -split_dataset(seq = np.array , ...)

split_time = 3000

X_train, X_valid = split_dataset(seq = df.y.values, split_time = split_time)
(window_size, batch_size, shuffle_buffer_size) = (30,32,1000)

# Sliding window
trainset = windowed_dataset(
    series = X_train, 
    window_size = window_size, 
    batch_size = batch_size, 
    shuffle_buffer = shuffle_buffer_size, 
    shuffle = False
)

In [6]:
# LSTM Model - Tensorflow
# --------------------------------------------------------------------------------------------------
model =  Sequential([
    Conv1D(filters = 64, kernel_size = 3, strides = 1, activation = "relu", padding = "causal", input_shape = [window_size, 1]),
    LSTM(64, return_sequences = True),
    LSTM(64),
    Dense(30, activation = "relu"),
    Dense(30, activation = "relu"),
    Dense(1),
    Lambda(lambda x: x * 400)
])

# --- Tune learning rate ---
# Store initial weights
init_weights = model.get_weights()

# Find best learning rates for these weights
history, p2, best_lr = tune_learning_rate(model = model, trainset = trainset, lr = 1e-8)
p2.update_layout(width = 800, height = 500)
p2.show()

print(f"Best learning-rate : {best_lr}")
#best lr = 5.011872336272723e-07

2022-06-02 16:16:27.753786: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Best learning-rate : 1.778279410038923e-07


In [7]:
# Train Model
# -------------------------------------------------------------------------------------------------
# Reset states and weights and generated by keras

history, p3 = fit_model(model = model, trainset = trainset, lr = best_lr, init_weights = init_weights, epochs = 100)
p3.update_layout(width = 800, height = 500)
p3.show()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [8]:
# Prediction 
# ------------------------------------------------------------------------------------------------
# Take last window (from training set) + rest of data (validation set) 
forecasted_series = df.y.values[split_time - window_size : -1]
# prediction
forecast = model_forecast(model = model, series = forecasted_series, window_size = window_size, batch_size = batch_size)
# drop single dimension axis
results = forecast.squeeze()

p1.add_trace(go.Scatter(
    x = df.date.values[split_time : -1],
    y = results,
    mode = "lines",
    name = "forecasted Ts"
))

p1.show()

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [14]:
print(f"MSE : {mean_squared_error(y_true=X_valid, y_pred = results)}")
print(f"MAE : {mean_absolute_error(y_true=X_valid, y_pred = results)}")

MSE : 11587.884935568069
MAE : 72.41096710363435


In [15]:
model.weights

[<tf.Variable 'conv1d/kernel:0' shape=(3, 1, 64) dtype=float32, numpy=
 array([[[ 0.17423865,  0.20285042, -0.11946086,  0.01420899,
          -0.1223103 ,  0.12867047,  0.09017225,  0.01635855,
           0.14564891, -0.06328992, -0.07256056, -0.05550368,
           0.07540175,  0.05885328,  0.01824968, -0.14660992,
           0.11173006,  0.01819397,  0.04289448, -0.11221873,
           0.12707356, -0.14087065, -0.1336939 ,  0.06957962,
          -0.14264505, -0.07854366, -0.01941684, -0.1366665 ,
          -0.13356122, -0.1621502 ,  0.05293549,  0.05186799,
          -0.00391741, -0.15430994,  0.09613304, -0.12224622,
           0.08616988,  0.10148938,  0.19745986, -0.01992902,
           0.00439858, -0.12345627, -0.06456272, -0.0074877 ,
          -0.0902556 ,  0.08630838, -0.16442713,  0.13545828,
           0.15052308,  0.04767253,  0.24661244, -0.02449835,
           0.02969101, -0.00709416, -0.04812169,  0.08666363,
           0.17538904,  0.12067331, -0.10522272,  0.01817448,