In [49]:
import pickle
from glob import glob
import numpy as np
from functools import reduce
import operator
import pandas as pd

# Load Metrics

In [50]:
trained_mods_path = './models/trained/*'
mods = glob(trained_mods_path)
metric_paths = []
for mod in mods:
    metric_paths.append(glob(f'{mod}/metrics*'))
metric_paths = reduce(operator.concat, metric_paths)
metric_paths

['./models/trained/ARIMA/metrics_ARIMA(1,0,5).dat',
 './models/trained/baseline/metrics_baseline_mean.dat',
 './models/trained/baseline/metrics_baseline_tod.dat',
 './models/trained/CNN/metrics_CNN.dat',
 './models/trained/prophet/metrics_prophet.dat',
 './models/trained/STGCN/metrics_STGCN.dat',
 './models/trained/LSTM/metrics_LSTM.dat']

In [51]:
metrics = {}
for metric_path in metric_paths:
    mod = metric_path.rsplit('/')[-1].split('.')[0].split('_')[-1]
    with open(metric_path, 'rb') as f:
        met = pickle.load(f)
    metrics[mod] = met
metrics = pd.DataFrame(metrics).reset_index().rename(columns={'index': 'Type'})
metrics 

Unnamed: 0,Type,"ARIMA(1,0,5)",mean,tod,CNN,prophet,STGCN,LSTM
0,cv,"{1: {'train_time': 1.0791349411010742, 'mse': ...","{1: {'train_time': 8.106231689453125e-06, 'mse...","{1: {'train_time': 6.9141387939453125e-06, 'ms...","{1: {'train_time': 1.2046880722045898, 'mse': ...","{1: {'train_time': 1.7118539810180664, 'mse': ...","{1: {'train_time': 14.103374946945149, 'mse': ...","{1: {'train_time': 1.4748117923736572, 'mse': ..."
1,test,"{'mse': 55.524369654566, 'mae': 20.77337119887...","{'mse': 7.3442733509200675, 'mae': 2.036776656...","{'mse': 4.347679951586775, 'mae': 1.3925114026...","{'mse': 0.0090606455, 'mae': 0.059377793, 'rms...","{'mse': 4.359167659768827, 'mae': 1.4214996141...","{'mse': 1.0057943694489999e-06, 'rmse': 0.0010...","{'mse': 0.008303482, 'mae': 0.063231826, 'rmse..."


# Transform

In [52]:
metrics = pd.melt(metrics, id_vars=['Type'], var_name='Model')
metrics

Unnamed: 0,Type,Model,value
0,cv,"ARIMA(1,0,5)","{1: {'train_time': 1.0791349411010742, 'mse': ..."
1,test,"ARIMA(1,0,5)","{'mse': 55.524369654566, 'mae': 20.77337119887..."
2,cv,mean,"{1: {'train_time': 8.106231689453125e-06, 'mse..."
3,test,mean,"{'mse': 7.3442733509200675, 'mae': 2.036776656..."
4,cv,tod,"{1: {'train_time': 6.9141387939453125e-06, 'ms..."
5,test,tod,"{'mse': 4.347679951586775, 'mae': 1.3925114026..."
6,cv,CNN,"{1: {'train_time': 1.2046880722045898, 'mse': ..."
7,test,CNN,"{'mse': 0.0090606455, 'mae': 0.059377793, 'rms..."
8,cv,prophet,"{1: {'train_time': 1.7118539810180664, 'mse': ..."
9,test,prophet,"{'mse': 4.359167659768827, 'mae': 1.4214996141..."


In [53]:
cv_metrics = metrics[metrics['Type'] == 'cv']
cv_metrics = pd.concat([cv_metrics.drop(['value'], axis=1), cv_metrics['value'].apply(pd.Series)], axis=1)
cv_metrics = pd.melt(cv_metrics, id_vars=['Type', 'Model'], var_name='Fold')
cv_metrics = pd.concat([cv_metrics.drop(['value'], axis=1), cv_metrics['value'].apply(pd.Series)], axis=1)

test_metrics = metrics[metrics['Type'] == 'test']
test_metrics = pd.concat([test_metrics.drop(['value'], axis=1), test_metrics['value'].apply(pd.Series)], axis=1)

In [54]:
metrics = pd.concat([cv_metrics, test_metrics]).reset_index(drop=True)
metrics

Unnamed: 0,Type,Model,Fold,train_time,mse,mae,rmse,r2
0,cv,"ARIMA(1,0,5)",1.0,1.079135,9.812994,2.277368,3.13257,0.547846
1,cv,mean,1.0,8e-06,8.729189,2.149273,2.95452,0.458948
2,cv,tod,1.0,7e-06,5.435395,1.452499,2.331393,0.367159
3,cv,CNN,1.0,1.204688,0.002003,0.028232,0.04476,0.997852
4,cv,prophet,1.0,1.711854,4.45023,1.396939,2.109557,0.402251
5,cv,STGCN,1.0,14.103375,0.0001,0.005,0.01,0.975959
6,cv,LSTM,1.0,1.474812,0.002232,0.029077,0.047241,0.997607
7,cv,"ARIMA(1,0,5)",2.0,2.081563,7.99896,2.149618,2.828243,0.573287
8,cv,mean,2.0,7e-06,5.451031,1.936104,2.334744,0.328933
9,cv,tod,2.0,6e-06,2.511742,1.24812,1.584848,0.533692


# Benchmark

**CV Metrics**

In [55]:
cv_metrics = metrics[metrics['Type'] == 'cv'].drop('Type', axis=1)
avg_cv_metrics = cv_metrics.groupby('Model').mean().reset_index()
avg_cv_metrics

Unnamed: 0,Model,train_time,mse,mae,rmse,r2
0,"ARIMA(1,0,5)",3.6717,8.650417,2.224526,2.933191,0.569745
1,CNN,3.41936,0.002046,0.028655,0.045219,0.997948
2,LSTM,4.315858,0.002352,0.031236,0.04849,0.99764
3,STGCN,14.688084,2.3e-05,0.0017,0.0034,0.98138
4,mean,6e-06,7.118832,2.056072,2.650853,0.354317
5,prophet,11.4287,3.97194,1.375251,1.981937,0.444081
6,tod,6.3e-05,3.877352,1.355714,1.935624,0.462216


In [56]:
import plotly.express as px

In [61]:
avg_cv_metrics_plot_data = pd.melt(avg_cv_metrics, id_vars='Model', var_name='Metric', value_name='Value')
fig = px.bar(avg_cv_metrics_plot_data, x='Model', y='Value', color='Metric', barmode='group', title='CV Metrics Averaged Across 5 Folds')
fig

In [62]:
# fig.write_html('./plots/benchmark_cv.html')

**Test Metrics**

In [59]:
test_metrics = metrics[metrics['Type'] == 'test'].drop(['Type', 'Fold', 'train_time'], axis=1)
test_metrics

Unnamed: 0,Model,mse,mae,rmse,r2
35,"ARIMA(1,0,5)",55.52437,20.773371,22.801079,0.557439
36,mean,7.344273,2.036777,2.710032,0.390239
37,tod,4.34768,1.392511,2.085109,0.402642
38,CNN,0.009061,0.059378,0.095187,0.996885
39,prophet,4.359168,1.4215,2.087862,0.401064
40,STGCN,1e-06,0.000501,0.001003,0.989383
41,LSTM,0.008303,0.063232,0.091123,0.997146


In [63]:
test_metrics_plot_data = pd.melt(test_metrics, id_vars='Model', var_name='Metric', value_name='Value')
fig = px.bar(test_metrics_plot_data, x='Model', y='Value', color='Metric', barmode='group', title='Test Metrics')
fig

In [64]:
# fig.write_html('./plots/benchmark_test.html')