In [2]:
import os
import math
import random
import seaborn
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from keras.callbacks import EarlyStopping
from collections import namedtuple


In [3]:
df = pd.read_excel("M3C.xls", usecols="A:Z")

df_micro = df.iloc[0:146,]
df_micro = df_micro.iloc[:,6:27]

In [None]:
#Detrend
#for i in range(len(df_train)):
#    data = df_train.iloc[i]
#    poly_fit = np.polyfit(np.arange(14), data, 2)
#    trend = np.polyval(poly_fit, np.arange(14))
#    df_train.iloc[i] = df_train.iloc[i] - trend

In [4]:
df_train = df_micro.iloc[:,:-6]
df_test = df_micro.iloc[:, -6:]

#Standardising
scaler = StandardScaler()
df_train = scaler.fit_transform(df_train.to_numpy().reshape(-1,1))
df_train = pd.DataFrame(df_train)
MEAN = scaler.mean_
STD = scaler.scale_

df_train = df_train.to_numpy().reshape(-1,14)
df_test = df_test.to_numpy().reshape(-1,6)

In [5]:
def get_labelled_window(x, horizon=1):
  return x[:, :-horizon], x[:, -horizon]

def make_windows(x, window_size=4, horizon=1):
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2D array of windows of window size
  windowed_array = x[window_indexes]
  windows, labels = get_labelled_window(windowed_array, horizon=horizon)
  return windows.reshape(-1,4), labels.reshape(-1,1)

In [6]:
train_x = []
train_y = []
test_x = []
test_y = []

for i in range(len(df_train)):
    windows_train, labels_train = make_windows(df_train[i], window_size=4, horizon=1)
    windows_test, labels_test = make_windows(df_test[i], window_size=4, horizon=1)
    train_x = np.concatenate((np.array(train_x).reshape(-1,4), windows_train.reshape(-1,4)))
    train_y = np.concatenate((np.array(train_y).reshape(-1,1), labels_train.reshape(-1,1)))
    test_x = np.concatenate((np.array(test_x).reshape(-1,4), windows_test.reshape(-1,4)))
    test_y = np.concatenate((np.array(test_y).reshape(-1,1), labels_test.reshape(-1,1)))

In [7]:
# SMAPE
def evaluate_smape(y_true, y_pred):
    return 200 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true)))

def evaluate_mdape(y_true, y_pred):
 return np.median((np.abs(np.subtract(y_true, y_pred)/ y_true))) * 100

def calculate_average_rankings(y_true, y_pred):
    num_series = len(y_pred)
    num_methods = len(y_pred[0])

    ranks = []  # to store ranks for each series

    for series_index in range(num_series):
        sape_values = [
            abs((y_true[series_index] - forecast) / y_true[series_index]) * 100
            for forecast in y_pred[series_index]
        ]
        sorted_sape = sorted(sape_values)  # sort SAPE values in ascending order
        series_ranks = [sorted_sape.index(sape) + 1 for sape in sape_values]  # assign ranks to SAPE values
        ranks.append(series_ranks)

    mean_ranks = []  # to store mean ranks for each forecasting method

    for method_index in range(num_methods):
        total_rank = sum(ranks[series_index][method_index] for series_index in range(num_series))
        mean_rank = total_rank / num_series
        mean_ranks.append(mean_rank)

    return mean_ranks

In [11]:
def evaluate_pred(y_true, y_pred):
    # Symmetric mean absolute percentage error
    smape = evaluate_smape(y_true, y_pred)
    # Median symmetric absolute percentage error
    mdape = evaluate_mdape(y_true, y_pred)
    return smape, mdape

In [10]:
def evaluate_model(y_true_set, y_pred_set):
    # Average Ranking
    avg_ranking = None
    # Percentage Better
    percentage_better = None

In [12]:
# Destandardise
def de_standardise(value):
    return value * STD + MEAN

def standardise(value):
    return (value - MEAN) / STD

In [13]:
# Hyperparameters
Combination = namedtuple("Combination", "learning_rate batch_size regularization hidden_layers hidden_neurons")

learning_rates = np.array([0.001, 0.01, 0.1])
batch_sizes = np.array([16, 32, 64, 128])
regularizations = np.array([0.001, 0.01, 0.001])
hidden_layers = np.array([2, 3, 4, 6])
hidden_neurons = np.array([2, 3, 4, 5, 6, 7, 8])

combinations = list(itertools.starmap(Combination, itertools.product(learning_rates, batch_sizes, regularizations, hidden_layers, hidden_neurons)))

In [14]:
# Time-series expanding window validation

tf.random.set_seed(42)
eval_scores = []
tscv = TimeSeriesSplit(n_splits=5)
def cross_validation(combination, train_x=train_x, train_y=train_y, tscv=tscv):
    hidden_neurons = np.arange(2, 9)
    smape_scores = []
    mdape_scores = []

    # Cross-Validation
    for train_index, test_index in tscv.split(train_x):
        train_x_cv, val_x_cv = train_x[train_index], train_x[test_index]
        train_y_cv, val_y_cv = train_y[train_index], train_y[test_index]

        # Create model with selected hyperparameters
        model_cv = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(4, 1)),
        ], name="model")

        #chosen_hidden_neurons = []

        for i in range(combination.hidden_layers):
            #random_neuron = random.choice(hidden_neurons)
            #chosen_hidden_neurons.append(random_neuron)
            model_cv.add(tf.keras.layers.Dense(combination.hidden_neurons, 
                                            activation="relu", 
                                            kernel_initializer=tf.initializers.HeNormal(), 
                                            kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))
        model_cv.add(tf.keras.layers.Dense(1, activation="linear", 
                                        kernel_initializer=tf.initializers.HeNormal(), 
                                        kernel_regularizer=tf.keras.regularizers.l2(combination.regularization)))


        model_cv.compile(loss="mae",
                        optimizer=tf.keras.optimizers.Adam(learning_rate=combination.learning_rate),
                        metrics=["mae", "mse"]) # Backpropagation
        
        model_cv.fit(train_x_cv, train_y_cv, epochs=50, batch_size=combination.batch_size, verbose=0)

        predictions = model_cv.predict(val_x_cv)
        smape_score, mdape_score = evaluate_pred(de_standardise(val_y_cv), de_standardise(predictions))
        
        smape_scores.append(smape_score)
        mdape_scores.append(mdape_score)
        
    mean_smape = np.mean(smape_scores)
    mean_mdape = np.mean(mdape_scores)
    hyperparameters = {
        'learning_rate': combination.learning_rate,
        'batch_size': combination.batch_size,
        'regularization': combination.regularization,
        'hidden_neurons': combination.hidden_neurons,
        'hidden_layers': combination.hidden_layers
    }
    print(f"Current mean SMAPE: {mean_smape}, Current hyperparameters: {hyperparameters}")
    return mean_smape, mean_mdape, hyperparameters

random_combinations = random.sample(combinations, 1)
results = map(cross_validation, random_combinations)

optimal_smape = float('inf')
optimal_mdape = float('inf')
optimal_hyperparameters = {}
for result in results:
    smape, mdape, hyperparameters = result
    if smape < optimal_smape:
        optimal_smape = smape
        optimal_mdape = mdape
        optimal_hyperparameters = hyperparameters
print("Best Hyperparameters:", optimal_hyperparameters)
print("Best SMAPE Score:", optimal_smape)
print("Best MDAPE Score:", optimal_mdape)

Current mean SMAPE: 34.16183184337592, Current hyperparameters: {'learning_rate': 0.1, 'batch_size': 16, 'regularization': 0.001, 'hidden_neurons': 2, 'hidden_layers': 6}
Best Hyperparameters: {'learning_rate': 0.1, 'batch_size': 16, 'regularization': 0.001, 'hidden_neurons': 2, 'hidden_layers': 6}
Best SMAPE Score: 34.16183184337592
Best MDAPE Score: 25.08078977482512


In [15]:
print(f"Regularization: {optimal_hyperparameters['regularization']}")
print(f"Learning Rate: {optimal_hyperparameters['learning_rate']}")
print(f"Batch Size: {optimal_hyperparameters['batch_size']}")

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(4, 1)),
], name="model")

for i in range(optimal_hyperparameters["hidden_layers"]):
    model.add(tf.keras.layers.Dense(optimal_hyperparameters["hidden_neurons"], 
                                    activation="relu", 
                                    kernel_initializer=tf.initializers.HeNormal(), 
                                    kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))
model.add(tf.keras.layers.Dense(1, activation="linear", 
                                kernel_initializer=tf.initializers.HeNormal(), 
                                kernel_regularizer=tf.keras.regularizers.l2(optimal_hyperparameters["regularization"])))

print()
model.compile(loss="mse",
                optimizer=tf.keras.optimizers.Adam(learning_rate=optimal_hyperparameters["learning_rate"]), 
                metrics=["mse", "mae"]) # Backpropagation

early_stopping = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=50)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.hdf5', monitor='loss', mode='min', verbose=1, save_best_only=True)
# Train the model on the full training dataset
model.fit(train_x, train_y, epochs=1000, batch_size=optimal_hyperparameters["batch_size"], verbose=1, callbacks=[early_stopping, model_checkpoint])

Regularization: 0.001
Learning Rate: 0.1
Batch Size: 16

Epoch 1/1000
Epoch 1: loss improved from inf to 0.98813, saving model to best_model.hdf5
Epoch 2/1000
Epoch 2: loss improved from 0.98813 to 0.98689, saving model to best_model.hdf5
Epoch 3/1000
Epoch 3: loss improved from 0.98689 to 0.98660, saving model to best_model.hdf5
Epoch 4/1000
Epoch 4: loss improved from 0.98660 to 0.98130, saving model to best_model.hdf5
Epoch 5/1000
Epoch 5: loss did not improve from 0.98130
Epoch 6/1000
Epoch 6: loss did not improve from 0.98130
Epoch 7/1000
Epoch 7: loss did not improve from 0.98130
Epoch 8/1000
Epoch 8: loss improved from 0.98130 to 0.98105, saving model to best_model.hdf5
Epoch 9/1000
Epoch 9: loss did not improve from 0.98105
Epoch 10/1000
Epoch 10: loss did not improve from 0.98105
Epoch 11/1000
Epoch 11: loss did not improve from 0.98105
Epoch 12/1000
Epoch 12: loss did not improve from 0.98105
Epoch 13/1000
Epoch 13: loss did not improve from 0.98105
Epoch 14/1000
Epoch 14: lo

<keras.callbacks.History at 0x13caf77c0>

In [322]:
def autoregression(model, x, horizon=6):
    standardised_x = standardise(x)
    for i in range(horizon):
        forecast = model.predict(np.array([standardised_x[i:i+4]]))
        pred = np.array([tf.squeeze(forecast).numpy()])
        standardised_x = np.concatenate((standardised_x, pred))
    return de_standardise(standardised_x[-horizon:])

#autoregression(model, np.array([4793.2, 5602, 5065, 5056]), 1)




array([5205.8997691])

In [None]:
def evaluate_model_on_test(model, df_train, test_x, test_y):
    for i in range(len(df_train)):
        window = df_train[-1:-4]

In [1]:
df_test.shape

NameError: name 'df_test' is not defined

In [89]:
for i in range(len(df_train)):
    train_x[]
    predicted_values = autoregression(model, , 6)

array([[5379.75, 6158.68, 6876.58, 7851.91],
       [6158.68, 6876.58, 7851.91, 8407.84],
       [6876.58, 7851.91, 8407.84, 9156.01],
       ...,
       [8147.7 , 3330.55, 3326.2 , 3975.25],
       [3330.55, 3326.2 , 3975.25, 4276.15],
       [3326.2 , 3975.25, 4276.15, 6718.35]])

In [510]:
# data = (np.array([[940.66, 1084.86, 1244.98, 1445.02]]) - scaler.mean_) / scaler.scale_
# print(data.shape)
#def make_preds(model, input_data):
#  forecast = model.predict(input_data)
#  preds = tf.squeeze(forecast)
#  return preds

#pred = make_preds(model, data)
# inversed = de_standardise(np.array(pred))
# inversed

[[-1.21687544 -1.13856182 -1.0407236  -0.92424603]]
