In [63]:
import os
import math
import random
import seaborn
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from keras.callbacks import EarlyStopping
from collections import namedtuple


In [24]:
df = pd.read_excel("M3C.xls", usecols="A:Z")

df_micro = df.iloc[0:146,]
df_micro = df_micro.iloc[:,6:27]

In [None]:
#Detrend
#for i in range(len(df_train)):
#    data = df_train.iloc[i]
#    poly_fit = np.polyfit(np.arange(14), data, 2)
#    trend = np.polyval(poly_fit, np.arange(14))
#    df_train.iloc[i] = df_train.iloc[i] - trend

In [25]:
df_train = df_micro.iloc[:,:-6]
df_test = df_micro.iloc[:, -6:]

##Standardising
scaler = StandardScaler()
df_train = scaler.fit_transform(df_train.to_numpy().reshape(-1,1))
df_train = pd.DataFrame(df_train)
MEAN = scaler.mean_
STD = scaler.scale_

def exponential_smoothing(data, alpha):
    return data.ewm(alpha=alpha, adjust=False).mean()

# Assuming 'data' is a pandas Series or DataFrame column
alpha = 0.825 
df_train = exponential_smoothing(df_train, alpha)
df_train = df_train.to_numpy().reshape(-1,14)
df_test = df_test.to_numpy().reshape(-1,6)



In [26]:
def get_labelled_window(x, horizon=1):
  return x[:, :-horizon], x[:, -horizon]

def make_windows(x, window_size=4, horizon=1):
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2D array of windows of window size
  windowed_array = x[window_indexes]
  windows, labels = get_labelled_window(windowed_array, horizon=horizon)
  return windows.reshape(-1,4), labels.reshape(-1,1)

In [80]:
train_x = []
train_y = []
test_x = []
test_y = []

for i in range(len(df_train)):
    windows_train, labels_train = make_windows(df_train[i], window_size=4, horizon=1)
    windows_test, labels_test = make_windows(df_test[i], window_size=4, horizon=1)
    train_x = np.concatenate((np.array(train_x).reshape(-1,4), windows_train.reshape(-1,4)))
    train_y = np.concatenate((np.array(train_y).reshape(-1,1), labels_train.reshape(-1,1)))
    test_x = np.concatenate((np.array(test_x).reshape(-1,4), windows_test.reshape(-1,4)))
    test_y = np.concatenate((np.array(test_y).reshape(-1,1), labels_test.reshape(-1,1)))

train_x

array([[-1.28740269, -1.22921771, -1.1544266 , -1.06062162],
       [-1.22921771, -1.1544266 , -1.06062162, -0.94811176],
       [-1.1544266 , -1.06062162, -0.94811176, -0.7851874 ],
       ...,
       [-0.61448562, -0.66323109, -0.34216097, -0.33915526],
       [-0.66323109, -0.34216097, -0.33915526, -0.4754366 ],
       [-0.34216097, -0.33915526, -0.4754366 , -0.3073803 ]])

In [68]:
# SMAPE
def evaluate_smape(y_true, y_pred):
    numerator = 2 * tf.abs(y_pred - y_true)
    denominator = tf.abs(y_pred) + tf.abs(y_true)
    smape = tf.reduce_mean(numerator / denominator) * 100
    return smape

def smape_loss(y_true, y_pred):
    epsilon = 0.1  # to avoid division by zero
    denominator = tf.abs(y_true) + tf.abs(y_pred) + epsilon
    diff = tf.abs(y_true - y_pred) / denominator
    smape = 2.0 * tf.reduce_mean(diff)
    return smape

def metric_mdape(y_true, y_pred):
    return tfp.stats.percentile((tf.abs(tf.math.subtract(y_true, y_pred)/ y_true)), 50.0, interpolation='midpoint')
      
def evaluate_mdape(y_true, y_pred):
    return np.median((np.abs(np.subtract(y_true, y_pred)/ y_true))) * 100

def evaluate_pred(y_true, y_pred):
    # Symmetric mean absolute percentage error
    smape = evaluate_smape(y_true, y_pred)
    # Median symmetric absolute percentage error
    mdape = evaluate_mdape(y_true, y_pred)
    return smape, mdape

In [31]:
# Destandardise
def de_standardise(value):
    return value * STD + MEAN

def standardise(value):
    return (value - MEAN) / STD

In [32]:
# Hyperparameters
Combination = namedtuple("Combination", "learning_rate batch_size regularization hidden_layers hidden_neurons")

learning_rates = np.array([0.001, 0.01, 0.1])
batch_sizes = np.array([16, 32, 64, 128])
regularizations = np.array([0.001, 0.01, 0.001])
hidden_layers = np.array([2, 3, 4, 6, 8])
hidden_neurons = np.array([2, 3, 4, 5])

combinations = list(itertools.starmap(Combination, itertools.product(learning_rates, batch_sizes, regularizations, hidden_layers, hidden_neurons)))

In [91]:
# Time-series expanding window validation
#with tf.device('/cpu:0'):
    
tf.random.set_seed(42)
eval_scores = []
tscv = TimeSeriesSplit(n_splits=5)
def cross_validation(combination, train_x=train_x, train_y=train_y, tscv=tscv):
    smape_scores = []
    mdape_scores = []

    # Cross-Validation
    for train_index, test_index in tscv.split(train_x):
        train_x_cv, val_x_cv = train_x[train_index], train_x[test_index]
        train_y_cv, val_y_cv = train_y[train_index], train_y[test_index]
        # Create model with selected hyperparameters
        model_cv = tf.keras.Sequential(name="model")

        #chosen_hidden_neurons = []

        for i in range(combination.hidden_layers):
            #random_neuron = random.choice(hidden_neurons)
            #chosen_hidden_neurons.append(random_neuron)
            model_cv.add(tf.keras.layers.Dense(combination.hidden_neurons, 
                                            activation="relu", 
                                            kernel_initializer=tf.initializers.HeNormal(), 
                                            kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))
        model_cv.add(tf.keras.layers.Dense(1, activation="linear", 
                                        kernel_initializer=tf.initializers.HeNormal(), 
                                        kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))


        model_cv.compile(loss=smape_loss,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=combination.learning_rate),
                        metrics=[metric_mdape, "mae", "mse"]) # Backpropagation
        
        model_cv.fit(train_x_cv, train_y_cv, epochs=50, batch_size=combination.batch_size, verbose=1)

        predictions = model_cv.predict(val_x_cv)
        smape_score, mdape_score = evaluate_pred(de_standardise(val_y_cv), de_standardise(predictions))
        
        smape_scores.append(smape_score)
        mdape_scores.append(mdape_score)
        
    mean_smape = np.mean(smape_scores)
    mean_mdape = np.mean(mdape_scores)
    hyperparameters = {
        'learning_rate': combination.learning_rate,
        'batch_size': combination.batch_size,
        'regularization': combination.regularization,
        'hidden_neurons': combination.hidden_neurons,
        'hidden_layers': combination.hidden_layers
    }
    print(f"Current mean SMAPE: {mean_smape}, Current hyperparameters: {hyperparameters}")
    return mean_smape, mean_mdape, hyperparameters

random_combinations = random.sample(combinations, 3)
results = map(cross_validation, random_combinations)

optimal_smape = float('inf')
optimal_mdape = float('inf')
optimal_hyperparameters = {}
for result in results:
    smape, mdape, hyperparameters = result
    if smape < optimal_smape:
        optimal_smape = smape
        optimal_mdape = mdape
        optimal_hyperparameters = hyperparameters
print("Best Hyperparameters:", optimal_hyperparameters)
print("Best SMAPE Score:", optimal_smape)
print("Best MDAPE Score:", optimal_mdape)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

KeyboardInterrupt: 

In [70]:
print(f"Regularization: {optimal_hyperparameters['regularization']}")
print(f"Learning Rate: {optimal_hyperparameters['learning_rate']}")
print(f"Batch Size: {optimal_hyperparameters['batch_size']}")

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(4, 1)),
], name="model")

for i in range(optimal_hyperparameters["hidden_layers"]):
    model.add(tf.keras.layers.Dense(optimal_hyperparameters["hidden_neurons"], 
                                    activation="relu", 
                                    kernel_initializer=tf.initializers.HeNormal(), 
                                    kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))
model.add(tf.keras.layers.Dense(1, activation="linear", 
                                kernel_initializer=tf.initializers.HeNormal(), 
                                kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))

print()
model.compile(loss=smape_loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=optimal_hyperparameters["learning_rate"]), 
                metrics=[metric_mdape, "mae", "mse"]) # Backpropagation

early_stopping = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=20)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.hdf5', monitor='loss', mode='min', verbose=1, save_best_only=True)
# Train the model on the full training dataset
model.fit(train_x, train_y, epochs=100, batch_size=optimal_hyperparameters["batch_size"], verbose=1, callbacks=[early_stopping, model_checkpoint])

Regularization: 0.001
Learning Rate: 0.01
Batch Size: 16

Epoch 1/100
Epoch 1: loss improved from inf to 1.22661, saving model to best_model.hdf5
Epoch 2/100
Epoch 2: loss improved from 1.22661 to 0.58647, saving model to best_model.hdf5
Epoch 3/100
Epoch 3: loss improved from 0.58647 to 0.46168, saving model to best_model.hdf5
Epoch 4/100
Epoch 4: loss improved from 0.46168 to 0.42782, saving model to best_model.hdf5
Epoch 5/100
Epoch 5: loss improved from 0.42782 to 0.40254, saving model to best_model.hdf5
Epoch 6/100
Epoch 6: loss improved from 0.40254 to 0.39870, saving model to best_model.hdf5
Epoch 7/100
Epoch 7: loss improved from 0.39870 to 0.39787, saving model to best_model.hdf5
Epoch 8/100
Epoch 8: loss did not improve from 0.39787
Epoch 9/100
Epoch 9: loss did not improve from 0.39787
Epoch 10/100
Epoch 10: loss did not improve from 0.39787
Epoch 11/100
Epoch 11: loss improved from 0.39787 to 0.39331, saving model to best_model.hdf5
Epoch 12/100
Epoch 12: loss improved from

<keras.callbacks.History at 0x13a6d8190>

In [92]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_99 (Flatten)        (None, 4)                 0         
                                                                 
 dense_534 (Dense)           (None, 2)                 10        
                                                                 
 dense_535 (Dense)           (None, 2)                 6         
                                                                 
 dense_536 (Dense)           (None, 1)                 3         
                                                                 
Total params: 19
Trainable params: 19
Non-trainable params: 0
_________________________________________________________________


In [49]:
def autoregression(model, x, horizon=6):
    standardised_x = standardise(x)
    for i in range(horizon):
        forecast = model.predict(np.array([standardised_x[i:i+4]]))
        pred = np.array([tf.squeeze(forecast).numpy()])
        standardised_x = np.concatenate((standardised_x, pred))
    return standardised_x[-horizon:]


In [50]:
def evaluate_model_on_test(model, df_train=df_train, df_test=df_test, horizon=6):
    smape_scores = []
    mdape_scores = []
    
    for i in range(len(df_train)):
        window = de_standardise(df_train[i][10:14])
        labels = df_test[i][0:horizon]
        test_preds = autoregression(model, window, 1)
        print(f"Destandardised test pred: {de_standardise(test_preds)}")
        print(f"Labels: {labels}")
        print(f"Full Labels: {df_test[i]}")
        print(f"Window: {window}")
        smape_score, mdape_score = evaluate_pred(labels, de_standardise(test_preds))
        smape_scores.append(smape_score)
        mdape_scores.append(mdape_score)
        print(f"Current mean SMAPE: {smape_score}, Current mean MDAPE: {mdape_score}")

    mean_smape_score = np.mean(smape_scores)
    mean_mdape_score = np.mean(mdape_scores)
    return mean_smape_score, mean_mdape_score


In [72]:
loaded_model = tf.keras.models.load_model("best_model.hdf5", custom_objects={"smape_loss": smape_loss, "metric_mdape": metric_mdape})
test1, test2 = evaluate_model_on_test(loaded_model)

Destandardised test pred: [5119.34595957]
Labels: [5379.75 6158.68 6876.58 7851.91 8407.84 9156.01]
Full Labels: [5379.75 6158.68 6876.58 7851.91 8407.84 9156.01]
Window: [3307.9736059  3720.19013103 4271.03427293 4820.44774776]
Current mean SMAPE: 33.332707749615956, Current mean MDAPE: 30.177580270552845
Destandardised test pred: [4526.48378177]
Labels: [4793.2 5602.  5065.  5056.  5067.2 5209.6]
Full Labels: [4793.2 5602.  5065.  5056.  5067.2 5209.6]
Window: [5390.19542625 3959.64919959 4021.64860993 4193.53850674]
Current mean SMAPE: 12.424497480243168, Current mean MDAPE: 10.651507171698801
Destandardised test pred: [3850.68141368]
Labels: [3070.2 3601.6 3407.4 3500.6 3437.8 3007. ]
Full Labels: [3070.2 3601.6 3407.4 3500.6 3437.8 3007. ]
Window: [4426.10100329 3164.92267558 3240.39146823 3475.35850694]
Current mean SMAPE: 14.485572660909618, Current mean MDAPE: 12.509710819179498
Destandardised test pred: [4927.01810769]
Labels: [4656.   5228.52 5656.72 5077.02 5403.4  5009.52]


In [93]:
loaded_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_99 (Flatten)        (None, 4)                 0         
                                                                 
 dense_534 (Dense)           (None, 2)                 10        
                                                                 
 dense_535 (Dense)           (None, 2)                 6         
                                                                 
 dense_536 (Dense)           (None, 1)                 3         
                                                                 
Total params: 19
Trainable params: 19
Non-trainable params: 0
_________________________________________________________________


In [74]:
test1

24.607080591140555

In [75]:
# data = (np.array([[940.66, 1084.86, 1244.98, 1445.02]]) - scaler.mean_) / scaler.scale_
# print(data.shape)
#def make_preds(model, input_data):
#  forecast = model.predict(input_data)
#  preds = tf.squeeze(forecast)
#  return preds

#pred = make_preds(model, data)
# inversed = de_standardise(np.array(pred))
# inversed