In [1]:
import os
import math
import random
import seaborn
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from keras.callbacks import EarlyStopping
from collections import namedtuple


In [None]:
df = pd.read_excel("M3C.xls", usecols="A:Z")

df_micro = df.iloc[0:146,]
df_micro = df_micro.iloc[:,6:27]

In [None]:
#Detrend
#for i in range(len(df_train)):
#    data = df_train.iloc[i]
#    poly_fit = np.polyfit(np.arange(14), data, 2)
#    trend = np.polyval(poly_fit, np.arange(14))
#    df_train.iloc[i] = df_train.iloc[i] - trend

In [119]:
df_train = df_micro.iloc[:,:-6]
df_test = df_micro.iloc[:, -6:]

##Standardising
scaler = StandardScaler()
df_train = scaler.fit_transform(df_train.to_numpy().reshape(-1,1))
df_train = pd.DataFrame(df_train)
MEAN = scaler.mean_
STD = scaler.scale_

def exponential_smoothing(data, alpha):
    return data.ewm(alpha=alpha, adjust=False).mean()

# Assuming 'data' is a pandas Series or DataFrame column
alpha = 0.825  # Smoothing factor (0 < alpha < 1)
df_train = exponential_smoothing(df_train, alpha)

df_train = df_train.to_numpy().reshape(-1,14)
df_test = df_test.to_numpy().reshape(-1,6)

In [111]:
def get_labelled_window(x, horizon=1):
  return x[:, :-horizon], x[:, -horizon]

def make_windows(x, window_size=4, horizon=1):
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2D array of windows of window size
  windowed_array = x[window_indexes]
  windows, labels = get_labelled_window(windowed_array, horizon=horizon)
  return windows.reshape(-1,4), labels.reshape(-1,1)

In [120]:
train_x = []
train_y = []
test_x = []
test_y = []

for i in range(len(df_train)):
    windows_train, labels_train = make_windows(df_train[i], window_size=4, horizon=1)
    windows_test, labels_test = make_windows(df_test[i], window_size=4, horizon=1)
    train_x = np.concatenate((np.array(train_x).reshape(-1,4), windows_train.reshape(-1,4)))
    train_y = np.concatenate((np.array(train_y).reshape(-1,1), labels_train.reshape(-1,1)))
    test_x = np.concatenate((np.array(test_x).reshape(-1,4), windows_test.reshape(-1,4)))
    test_y = np.concatenate((np.array(test_y).reshape(-1,1), labels_test.reshape(-1,1)))

In [105]:
# SMAPE
def evaluate_smape(y_true, y_pred):
    numerator = 2 * np.abs(y_pred - y_true)
    denominator = np.abs(y_pred) + np.abs(y_true)
    smape = np.mean(numerator / denominator) * 100
    return smape

def evaluate_mdape(y_true, y_pred):
 return np.median((np.abs(np.subtract(y_true, y_pred)/ y_true))) * 100

def calculate_average_rankings(y_true, y_pred):
    num_series = len(y_pred)
    num_methods = len(y_pred[0])

    ranks = []  # to store ranks for each series

    for series_index in range(num_series):
        sape_values = [
            abs((y_true[series_index] - forecast) / y_true[series_index]) * 100
            for forecast in y_pred[series_index]
        ]
        sorted_sape = sorted(sape_values)  # sort SAPE values in ascending order
        series_ranks = [sorted_sape.index(sape) + 1 for sape in sape_values]  # assign ranks to SAPE values
        ranks.append(series_ranks)

    mean_ranks = []  # to store mean ranks for each forecasting method

    for method_index in range(num_methods):
        total_rank = sum(ranks[series_index][method_index] for series_index in range(num_series))
        mean_rank = total_rank / num_series
        mean_ranks.append(mean_rank)

    return mean_ranks

In [59]:
def evaluate_pred(y_true, y_pred):
    # Symmetric mean absolute percentage error
    smape = evaluate_smape(y_true, y_pred)
    # Median symmetric absolute percentage error
    mdape = evaluate_mdape(y_true, y_pred)
    return smape, mdape

In [60]:
def evaluate_model(y_true_set, y_pred_set):
    # Average Ranking
    avg_ranking = None
    # Percentage Better
    percentage_better = None

In [61]:
# Destandardise
def de_standardise(value):
    return value * STD + MEAN

def standardise(value):
    return (value - MEAN) / STD

In [88]:
# Hyperparameters
Combination = namedtuple("Combination", "learning_rate batch_size regularization hidden_layers hidden_neurons")

learning_rates = np.array([0.001, 0.01, 0.1])
batch_sizes = np.array([16, 32, 64, 128])
regularizations = np.array([0.001, 0.01, 0.001])
hidden_layers = np.array([2, 3, 4, 6, 8])
hidden_neurons = np.array([2, 3, 4, 5])

combinations = list(itertools.starmap(Combination, itertools.product(learning_rates, batch_sizes, regularizations, hidden_layers, hidden_neurons)))

In [121]:
# Time-series expanding window validation
#with tf.device('/cpu:0'):
    
tf.random.set_seed(42)
eval_scores = []
tscv = TimeSeriesSplit(n_splits=5)
def cross_validation(combination, train_x=train_x, train_y=train_y, tscv=tscv):
    hidden_neurons = np.arange(2, 9)
    smape_scores = []
    mdape_scores = []

    # Cross-Validation
    for train_index, test_index in tscv.split(train_x):
        train_x_cv, val_x_cv = train_x[train_index], train_x[test_index]
        train_y_cv, val_y_cv = train_y[train_index], train_y[test_index]

        # Create model with selected hyperparameters
        model_cv = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(4, 1)),
        ], name="model")

        #chosen_hidden_neurons = []

        for i in range(combination.hidden_layers):
            #random_neuron = random.choice(hidden_neurons)
            #chosen_hidden_neurons.append(random_neuron)
            model_cv.add(tf.keras.layers.Dense(combination.hidden_neurons, 
                                            activation="relu", 
                                            kernel_initializer=tf.initializers.HeNormal(), 
                                            kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))
        model_cv.add(tf.keras.layers.Dense(1, activation="linear", 
                                        kernel_initializer=tf.initializers.HeNormal(), 
                                        kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))


        model_cv.compile(loss="mae",
                        optimizer=tf.keras.optimizers.Adam(learning_rate=combination.learning_rate),
                        metrics=["mae", "mse"]) # Backpropagation
        
        model_cv.fit(train_x_cv, train_y_cv, epochs=50, batch_size=combination.batch_size, verbose=0)

        predictions = model_cv.predict(val_x_cv)
        smape_score, mdape_score = evaluate_pred(de_standardise(val_y_cv), de_standardise(predictions))
        
        smape_scores.append(smape_score)
        mdape_scores.append(mdape_score)
        
    mean_smape = np.mean(smape_scores)
    mean_mdape = np.mean(mdape_scores)
    hyperparameters = {
        'learning_rate': combination.learning_rate,
        'batch_size': combination.batch_size,
        'regularization': combination.regularization,
        'hidden_neurons': combination.hidden_neurons,
        'hidden_layers': combination.hidden_layers
    }
    print(f"Current mean SMAPE: {mean_smape}, Current hyperparameters: {hyperparameters}")
    return mean_smape, mean_mdape, hyperparameters

random_combinations = random.sample(combinations, 3)
results = map(cross_validation, random_combinations)

optimal_smape = float('inf')
optimal_mdape = float('inf')
optimal_hyperparameters = {}
for result in results:
    smape, mdape, hyperparameters = result
    if smape < optimal_smape:
        optimal_smape = smape
        optimal_mdape = mdape
        optimal_hyperparameters = hyperparameters
print("Best Hyperparameters:", optimal_hyperparameters)
print("Best SMAPE Score:", optimal_smape)
print("Best MDAPE Score:", optimal_mdape)

Current mean SMAPE: 20.439698341565823, Current hyperparameters: {'learning_rate': 0.1, 'batch_size': 64, 'regularization': 0.001, 'hidden_neurons': 3, 'hidden_layers': 4}
Current mean SMAPE: 37.879043995656325, Current hyperparameters: {'learning_rate': 0.001, 'batch_size': 32, 'regularization': 0.001, 'hidden_neurons': 2, 'hidden_layers': 6}
Current mean SMAPE: 17.352771843666545, Current hyperparameters: {'learning_rate': 0.01, 'batch_size': 64, 'regularization': 0.01, 'hidden_neurons': 5, 'hidden_layers': 6}
Best Hyperparameters: {'learning_rate': 0.01, 'batch_size': 64, 'regularization': 0.01, 'hidden_neurons': 5, 'hidden_layers': 6}
Best SMAPE Score: 17.352771843666545
Best MDAPE Score: 12.920408131712577


In [123]:
print(f"Regularization: {optimal_hyperparameters['regularization']}")
print(f"Learning Rate: {optimal_hyperparameters['learning_rate']}")
print(f"Batch Size: {optimal_hyperparameters['batch_size']}")

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(4, 1)),
], name="model")

for i in range(optimal_hyperparameters["hidden_layers"]):
    model.add(tf.keras.layers.Dense(optimal_hyperparameters["hidden_neurons"], 
                                    activation="relu", 
                                    kernel_initializer=tf.initializers.HeNormal(), 
                                    kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))
model.add(tf.keras.layers.Dense(1, activation="linear", 
                                kernel_initializer=tf.initializers.HeNormal(), 
                                kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))

print()
model.compile(loss="mse",
                optimizer=tf.keras.optimizers.Adam(learning_rate=optimal_hyperparameters["learning_rate"]), 
                metrics=["mse", "mae"]) # Backpropagation

early_stopping = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=10)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.hdf5', monitor='loss', mode='min', verbose=1, save_best_only=True)
# Train the model on the full training dataset
model.fit(train_x, train_y, epochs=100, batch_size=optimal_hyperparameters["batch_size"], verbose=1, callbacks=[early_stopping, model_checkpoint])

Regularization: 0.01
Learning Rate: 0.01
Batch Size: 64

Epoch 1/100
 1/23 [>.............................] - ETA: 32s - loss: 1.4625 - mse: 0.8094 - mae: 0.7107
Epoch 1: loss improved from inf to 1.11994, saving model to best_model.hdf5
Epoch 2/100
Epoch 2: loss improved from 1.11994 to 0.70305, saving model to best_model.hdf5
Epoch 3/100
 1/23 [>.............................] - ETA: 0s - loss: 0.5997 - mse: 0.2415 - mae: 0.3293
Epoch 3: loss improved from 0.70305 to 0.51525, saving model to best_model.hdf5
Epoch 4/100
 1/23 [>.............................] - ETA: 0s - loss: 0.4721 - mse: 0.1805 - mae: 0.2818
Epoch 4: loss improved from 0.51525 to 0.41928, saving model to best_model.hdf5
Epoch 5/100
 1/23 [>.............................] - ETA: 0s - loss: 0.3125 - mse: 0.0648 - mae: 0.1924
Epoch 5: loss improved from 0.41928 to 0.37082, saving model to best_model.hdf5
Epoch 6/100
 1/23 [>.............................] - ETA: 0s - loss: 0.3310 - mse: 0.1124 - mae: 0.2199
Epoch 6: loss 

<keras.callbacks.History at 0x146034340>

In [124]:
def autoregression(model, x, horizon=6):
    standardised_x = standardise(x)
    for i in range(horizon):
        forecast = model.predict(np.array([standardised_x[i:i+4]]))
        pred = np.array([tf.squeeze(forecast).numpy()])
        standardised_x = np.concatenate((standardised_x, pred))
    return standardised_x[-horizon:]


In [125]:
def evaluate_model_on_test(model, df_train=df_train, df_test=df_test):
    smape_scores = []
    mdape_scores = []
    
    for i in range(len(df_train)):
        window = de_standardise(df_train[i][10:14])
        labels = df_test[i]
        test_preds = autoregression(model, window, 6)
        print(f"Destandardised test pred: {de_standardise(test_preds)}")
        print(f"Labels: {labels}")
        print(f"Window: {window}")
        smape_score, mdape_score = evaluate_pred(labels, de_standardise(test_preds))
        smape_scores.append(smape_score)
        mdape_scores.append(mdape_score)
        print(f"Current mean SMAPE: {smape_score}, Current mean MDAPE: {mdape_score}")

    mean_smape_score = np.mean(smape_scores)
    mean_mdape_score = np.mean(mdape_scores)
    return mean_smape_score, mean_mdape_score


In [126]:
loaded_model = tf.keras.models.load_model("best_model.hdf5")
test1, test2 = evaluate_model_on_test(loaded_model)

Destandardised test pred: [5160.84530295 5417.11369274 5629.8105018  5807.67461481 5958.77123212
 6083.73918388]
Labels: [5379.75 6158.68 6876.58 7851.91 8407.84 9156.01]
Window: [3307.9736059  3720.19013103 4271.03427293 4820.44774776]
Current mean SMAPE: 23.541376717714073, Current mean MDAPE: 22.082771402630872
Destandardised test pred: [4445.01355894 4731.37009385 5004.15422981 5250.89476384 5469.83777688
 5661.88655186]
Labels: [4793.2 5602.  5065.  5056.  5067.2 5209.6]
Window: [5390.19542625 3959.64919959 4021.64860993 4193.53850674]
Current mean SMAPE: 7.55702126000362, Current mean MDAPE: 7.605068457182707
Destandardised test pred: [3811.0292181  4175.78418014 4519.49101665 4829.45428818 5103.89728644
 5344.35724983]
Labels: [3070.2 3601.6 3407.4 3500.6 3437.8 3007. ]
Window: [4426.10100329 3164.92267558 3240.39146823 3475.35850694]
Current mean SMAPE: 31.87459075044967, Current mean MDAPE: 35.29914125932959
Destandardised test pred: [4875.2782266  5169.8128113  5418.62972617 

In [127]:
test1

22.526504753692652

In [510]:
# data = (np.array([[940.66, 1084.86, 1244.98, 1445.02]]) - scaler.mean_) / scaler.scale_
# print(data.shape)
#def make_preds(model, input_data):
#  forecast = model.predict(input_data)
#  preds = tf.squeeze(forecast)
#  return preds

#pred = make_preds(model, data)
# inversed = de_standardise(np.array(pred))
# inversed

[[-1.21687544 -1.13856182 -1.0407236  -0.92424603]]
