In [34]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import pickle as pkl
import tensorflow as tf

import sys
sys.path.append('..')

import metrics

import warnings
warnings.filterwarnings('once')

In [24]:
best_18_model_params = 'inp_18__out_12__loss_mae__bksize_200__bkact_leaky__dir_bi__'
best_12_model_params = 'inp_12__out_12__loss_mae__bksize_100__bkact_leaky__dir_bi__'

p = Path('../results').absolute()

models_dirs_12 = [str(p / best_12_model_params) + str(i) for i in range(10)]
models_dirs_18 = [str(p / best_18_model_params) + str(i) for i in range(10)]

ind = 2

best_18 = models_dirs_18[ind] + '/best_weights.h5'
best_12 = models_dirs_12[ind] + '/best_weights.h5'

In [3]:
train_path = Path('../data/Yearly-train.csv')
test_path = Path('../data/Yearly-test.csv')

In [4]:
train = pd.read_csv(train_path).drop('V1', axis=1)
test = pd.read_csv(test_path).drop('V1', axis=1)

In [5]:
np.seterr(all='ignore')

def get_last_N_valid(series, N=18):
    ser_N = series.dropna().iloc[-N:].values
    if len(ser_N) < N:
        return pd.Series([np.nan] * N)
    return ser_N

In [6]:
X_test_18 = np.array([get_last_N_valid(ser[1], N=18) for ser in train.iterrows()])
X_test_12 = np.array([get_last_N_valid(ser[1], N=12) for ser in train.iterrows()])
y_test = test.values

use_12 = np.isnan(X_test_18).any(axis=1)

In [32]:
def get_predictions(model_18, model_12, X_18, X_12, use_12):
    
    preds = []
    
    for i in tqdm(range(len(use_12))):
        
        if use_12[i]:
            model = model_12
            x = X_12[i][np.newaxis, :, np.newaxis]
        else:
            model = model_18
            x = X_18[i][np.newaxis, :, np.newaxis]

        mn, mx = x.min(), x.max()

        x_sc = (x - mn) / (mx - mn)
        pred = model(x_sc) 
        preds.append(pred[..., 0] * (mx - mn) + mn)

    return np.vstack(preds)

In [33]:
results = {}

mape = metrics.build_mape(overlap=6)
smape = metrics.build_smape(overlap=6)
mase_estimate = metrics.build_mase(overlap=6)
owa_estimate = metrics.build_owa(overlap=6)
reconstruction_loss = metrics.build_reconstruction_loss(overlap=6)

model_18 = tf.keras.models.load_model(best_18, custom_objects={'SMAPE': smape,
                                                               'MASE_estimate': mase_estimate,
                                                               'OWA_estimate': owa_estimate,
                                                               'reconstruction_loss': reconstruction_loss})

model_12 = tf.keras.models.load_model(best_12, custom_objects={'SMAPE': smape,
                                                               'MASE_estimate': mase_estimate,
                                                               'OWA_estimate': owa_estimate,
                                                               'reconstruction_loss': reconstruction_loss})

preds = get_predictions(model_18, model_12, X_test_18, X_test_12, use_12)

tf.keras.backend.clear_session()

results['smape'] = np.nanmean(metrics.SMAPE(y_test, preds[:, 6:]))
results['mase*'] = np.nanmean(metrics.MASE(X_test_18, y_test, preds[:, 6:]))

100%|██████████| 23000/23000 [38:34<00:00,  9.94it/s]


NameError: name 'metircs' is not defined

In [61]:
np.mean(metrics.SMAPE(y_test, preds[:, 6:])[~use_12])

12.392613585030816

In [64]:
np.mean(metrics.SMAPE(y_test, preds[:, 6:])[use_12])

15.991765854227102