In [None]:
from copyreg import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import lineStyles
from networkx.algorithms.bipartite.basic import color

from model import model
from DataExtraction import utils
import os
import ujson

In [None]:
# Load, from each model, the results, the model configuration and the evaluation metrics
os.chdir(utils.get_root_dir() + '/model/final_models')
n_models = len([name for name in os.listdir('.') if 'model_' in name])
training_stats, testing_stats, model_results, model_info = {}, {}, {}, {}
for i in range(1, n_models + 1):
    m = model.load_model(f'model_{i}')
    model_info[m.model_config.model_name] = m.model_config.__dict__
    # Print name and num of parameters for each model
    print(m.model_config.model_name, sum(p.numel() for p in m.parameters()))
    with open(m.model_config.model_src + '/train/training_stats.json', 'r') as f:
        training_stats[m.model_config.model_name] = ujson.load(f)
    with open(m.model_config.model_src + '/test/losses.json', 'r') as f:
        testing_stats[m.model_config.model_name] = ujson.load(f)
    model_results[m.model_config.model_name] = np.load(m.model_config.model_src + '/test/y_pred.npy'), np.load(m.model_config.model_src + '/test/y_test.npy')
    del m

In [None]:
def get_model_information(model_name, training_stats, testing_stats, model_results, model_info, results_df):
    model_type = model_info['model_type']
    temporal_window = model_info['window_size'] if model_type == 'LSTM' else str(model_info['window_size']) + '/' + str(model_info['subwindow_size'])
    training_time = sum(training_stats['epoch_times'])
    # num_params = sum(model_info['num_parameters'])
    epoch_num = max(training_stats['epoch_num'])
    layers_blocks = model_info['num_layers'] if model_type == 'LSTM' else model_info['xLSTM_config'].num_blocks
    batch_size = model_info['batch_size']
    dropout = model_info['dropout']
    learning_rate = model_info['lr']
    mse = testing_stats['total']['mse']
    mae = testing_stats['total']['mae']
    rmse = testing_stats['total']['rmse']
    r2 = testing_stats['total']['r2']
    results_df[model_name] = {
        'Type': model_type,
        'Temporal Window': temporal_window,
        'Training Time': training_time,
        # 'Number of Parameters': num_params,
        'Number of Epochs': epoch_num,
        'Layers/Blocks': layers_blocks,
        'Batch Size': batch_size,
        'Dropout': dropout,
        'Learning Rate': learning_rate,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

In [None]:
final_df = pd.DataFrame(index=['Type', 'Temporal Window', 'Training Time', 'Number of Epochs', 'Layers/Blocks', 'Batch Size', 'Dropout', 'Learning Rate', 'MSE', 'MAE', 'RMSE', 'R2'])

In [None]:
for model_name in model_info.keys():
    get_model_information(model_name, training_stats[model_name], testing_stats[model_name], model_results[model_name], model_info[model_name], final_df)

In [None]:
final_df.iloc[:,24:]

In [None]:
# MODEL CLASSIFICATION
# By Model Number
m1 = {'model_1': 30, 'model_2': 90, 'model_3': 180, 'model_4': 365}
m2 = {'model_5': 30, 'model_6': 90, 'model_7': 180, 'model_8': 365}
m3 = {'model_9': 30, 'model_10': 90, 'model_11': 180, 'model_12': 365}
m4 = {'model_13': 30, 'model_14': 90, 'model_15': 180, 'model_16': 365}
m5 = {'model_29': 30, 'model_30': 90, 'model_31': 180, 'model_32': 365}
m6 = {'model_21': 30, 'model_22': 90, 'model_23': 180, 'model_24': 365}
m7 = {'model_25': 30, 'model_26': 90, 'model_27': 180, 'model_28': 365}
all_models_by_number = {f'Model {[m1, m2, m3, m4, m5, m6, m7].index(m)+1}': m for m in [m1, m2, m3, m4, m5, m6, m7]}
# By Steps Forward
sfw_30 = ['model_1', 'model_5', 'model_9', 'model_13', 'model_29', 'model_21', 'model_25']
sfw_90 = ['model_2', 'model_6', 'model_10', 'model_14', 'model_30', 'model_22', 'model_26']
sfw_180 = ['model_3', 'model_7', 'model_11', 'model_15', 'model_31', 'model_23', 'model_27']
sfw_365 = ['model_4', 'model_8', 'model_12', 'model_16', 'model_32', 'model_24', 'model_28']
all_models_by_sfw = [sfw_30, sfw_90, sfw_180, sfw_365]
possible_models = [f'Model {i}' for i in range(1, 8)]

best_models_30 = [sfw_30[1], sfw_30[-1]]
best_models_90 = [sfw_90[1], sfw_90[-1]]
best_models_180 = [sfw_180[1], sfw_180[-1]]
best_models_365 = [sfw_365[1], sfw_365[-1]]

In [None]:
def get_pred(model_name, model_results, sensor):
    return model_results[model_name][0].T[sensor]
def get_test(model_results, sensor):
    return model_results['model_1'][1].T[sensor]

In [None]:
def plot_same_sfw_predictions(sensors, model_results, models, days):
    fix, ax = plt.subplots(3, 3, figsize=(20, 20))
    cmap = plt.get_cmap('Paired')
    plt.title(f'Prediction within {days} days')
    for s in range(len(sensors)):
        sensor = sensors[s]
        # Get predictions and test data
        y_pred = [get_pred(model, model_results, sensor) for model in models]
        y_test = get_test(model_results, sensor)
        # Min prediction length
        min_len = min(len(y) for y in y_pred)
        y_pred = [y[-min_len:] for y in y_pred]
        y_test = y_test[-min_len:]
        # Get numbers to match result table
        model_corrected_number = [i for model in models for i in possible_models if model in all_models_by_number[i]]
        # Plot results for each sensor
        ax.flat[s].plot(y_test, label='True', color='black', linestyle='dashed')
        for model_num, y in zip(model_corrected_number, y_pred):
            ax.flat[s].plot(y, label=model_num, color=cmap(model_corrected_number.index(model_num)))
            ax.flat[s].set_title(f'Sensor {s + 1}')
            ax.flat[s].legend()
    plt.show()

In [None]:
# 30 days ahead prediction
plot_same_sfw_predictions([s for s in range(0, 9)], model_results, best_models_30,  days=30)

In [None]:
# 90 days ahead prediction
plot_same_sfw_predictions([s for s in range(0, 9)], model_results, best_models_90,  days=90)

In [None]:
# 180 days ahead prediction
plot_same_sfw_predictions([s for s in range(0, 9)], model_results, best_models_180,  days=180)

In [None]:
# 365 days ahead prediction
plot_same_sfw_predictions([s for s in range(0, 9)], model_results, best_models_365,  days=365)

In [None]:
# Get errors for best models 30 days ahead for all sensors
def get_errors(testing_stats, models):
    errors = {}
    for model in models:
        errors[model] = testing_stats[model]
    return errors

errors_30 = get_errors(testing_stats, best_models_30)

In [None]:
errors_m2 = pd.DataFrame(errors_30['model_5'],)
errors_m2['Model'] = 'Model 2'
errors_m2.reset_index(inplace=True)
errors_m2.set_index(['Model', 'index'], inplace=True)
errors_m2.index.names = ['Model', 'Metric']

In [None]:
errors_m2

In [None]:
errors_m7 = pd.DataFrame(errors_30['model_25'])
errors_m7['Model'] = 'Model 7'
errors_m7.reset_index(inplace=True)
errors_m7.set_index(['Model', 'index'], inplace=True)
errors_m7.index.names = ['Model', 'Metric']

In [None]:
errors_m7

In [None]:
results_30_per_sensor = pd.concat([errors_m2, errors_m7]).T
results_30_per_sensor[['Model 7']]

In [None]:
import pickle
# Get scalers from Model 2 and Model 7
def get_scaler(model_name):
    with open(utils.get_root_dir()+f'/model/final_models/{model_name}/scalers.pkl', 'rb') as f:
        x_scaler, y_scaler = pickle.load(f)
    return x_scaler, y_scaler
scalers_m2 = [get_scaler(m) for m in m2.keys()]
scalers_m7 = [get_scaler(m) for m in m7.keys()]

In [None]:
# Convert y_pred for each model to the real scale (Model 2)
y_pred_m2 = []
for m in range(0, 4):
    y_pred = []
    for s in range(0, 9):
        y_pred.append(get_pred(list(m2.keys())[m], model_results, s))
    y_pred_m2.append(y_pred)
y_pred_m2 = [scalers_m2[i][1].inverse_transform(np.array(y_pred_m2[i]).T) for i in range(0, 4)]

In [None]:
# Convert y_pred for each model to the real scale (Model 7)
y_pred_m7 = []
for m in range(0, 4):
    y_pred = []
    for s in range(0, 9):
        y_pred.append(get_pred(list(m7.keys())[m], model_results, s))
    y_pred_m7.append(y_pred)
y_pred_m7 = [scalers_m7[i][1].inverse_transform(np.array(y_pred_m7[i]).T) for i in range(0, 4)]

In [None]:
# Convert y_test for each model to the real scale
y_test = []
for m in range(0, 4):
    y_test_ = []
    for s in range(0, 9):
        y_test_.append(get_test(model_results, s))
    y_test.append(y_test_)
y_test = [scalers_m7[i][1].inverse_transform(np.array(y_test[i]).T) for i in range(0, 4)]

In [None]:
# Plot 30 days predictions for sensor 2 and sensor 9
cmap = plt.get_cmap('Paired')
fig, ax = plt.subplots(1, 2, figsize=(16, 8))
ax[0].plot(y_test[0][:, 1], label='Real', linestyle='--', color='black',)
ax[0].plot(y_pred_m2[0][:, 1], label=f'Model 2', color=cmap(0))
ax[0].plot(y_pred_m7[0][:, 1], label=f'Model 7', color = cmap(1))
ax[0].set_title('Sensor 1 - 30 days ahead (real scale)')
ax[0].legend()
ax[1].plot(y_test[0][:, 8], label='Real', linestyle='--', color='black',)
ax[1].plot(y_pred_m2[0][:, 8], label=f'Model 2', color=cmap(0))
ax[1].plot(y_pred_m7[0][:, 8], label=f'Model 7', color = cmap(1))
ax[1].set_title('Sensor 9 - 30 days ahead (real scale)')
ax[1].legend()
plt.show()

In [None]:
y_test[0]