In [12]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import pickle as pkl
import tensorflow as tf
import itertools

import os
os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append('..')

import metrics
from data import decomposition

import warnings
warnings.filterwarnings('once')

In [37]:
logs = [x.name for x in Path('../results').glob('*dual*')]
families = set([x.rstrip('1234567890_') for x in logs])
family_dirs = [['../results/' + family + '__{}/best_weights.h5'.format(i) for i in range(30)]
               for family in families]
family_names = ['dual_{}'.format(f.split('_')[7]) for f in families]

In [14]:
train_path = Path('../data/Yearly-train.csv')
test_path = Path('../data/Yearly-test.csv')

In [15]:
train = pd.read_csv(train_path).drop('V1', axis=1)
test = pd.read_csv(test_path).drop('V1', axis=1)

In [16]:
np.seterr(all='ignore')

def get_last_N(series, N=18):
    ser_N = series.dropna().iloc[-N:].values
    if len(ser_N) < N:
        pad = [ser_N[0]] * (N - len(ser_N))
        ser_N = np.r_[pad, ser_N]
    return ser_N

In [17]:
X_test = np.array([get_last_N(ser[1], N=18) for ser in train.iterrows()])
y_test = test.values

In [24]:
model_dir = family_dirs[0][0]

mape = metrics.build_mape(overlap=6)
smape = metrics.build_smape(overlap=6)
mase_estimate = metrics.build_mase(overlap=6)
owa_estimate = metrics.build_owa(overlap=6)
reconstruction_loss = metrics.build_reconstruction_loss(overlap=6)

model = tf.keras.models.load_model(model_dir, custom_objects={'SMAPE': smape,
                                                              'MASE_estimate': mase_estimate,
                                                              'OWA_estimate': owa_estimate,
                                                              'reconstruction_loss': reconstruction_loss})

In [26]:
def get_predictions(model, data):
        
    x = data[..., np.newaxis]
    
    mn, mx = x.min(axis=1), x.max(axis=1)
    x_sc = (x[..., 0] - mn) / (mx - mn)

    lines, remainders = [], []
    for x in x_sc:
        l, r = decomposition.decompose(x)
        lines.append(l)
        remainders.append(r)
    
    x1 = np.array(lines)[..., np.newaxis]
    x2 = np.array(remainders)[..., np.newaxis]
    
    pred = model((x1, x2))

    return pred[..., 0] * (mx - mn) + mn

In [27]:
def ensemble_preds(model_family, data):

    preds = []
    
    for model_dir in tqdm(model_family):

        mape = metrics.build_mape(overlap=6)
        smape = metrics.build_smape(overlap=6)
        mase_estimate = metrics.build_mase(overlap=6)
        owa_estimate = metrics.build_owa(overlap=6)
        reconstruction_loss = metrics.build_reconstruction_loss(overlap=6)

        model = tf.keras.models.load_model(model_dir, custom_objects={'SMAPE': smape,
                                                                      'MASE_estimate': mase_estimate,
                                                                      'OWA_estimate': owa_estimate,
                                                                      'reconstruction_loss': reconstruction_loss})
        preds.append(get_predictions(model, data))

        tf.keras.backend.clear_session()

    return np.stack(preds)

In [28]:
def evaluate_ensemble(preds, y_test):
    ensemble_preds = np.median(preds, axis=0)[:, -6:]
    return np.nanmean(metrics.SMAPE(y_test, ensemble_preds))

In [29]:
model_preds = []

for family in family_dirs:
    model_preds.append(ensemble_preds(family, X_test))

100%|██████████| 30/30 [04:57<00:00,  9.93s/it]
100%|██████████| 30/30 [04:56<00:00,  9.87s/it]
100%|██████████| 30/30 [04:56<00:00,  9.88s/it]
100%|██████████| 30/30 [04:57<00:00,  9.92s/it]
100%|██████████| 30/30 [04:55<00:00,  9.84s/it]
100%|██████████| 30/30 [04:57<00:00,  9.91s/it]
100%|██████████| 30/30 [05:07<00:00, 10.26s/it]


In [39]:
for name, pred in zip(family_names, model_preds):
    print('{:>15}: {:.4f}'.format(name, evaluate_ensemble(pred, y_test)))

      dual_0.75: 13.0806
       dual_0.5: 13.0555
       dual_0.8: 13.0724
      dual_0.95: 13.0568
       dual_0.9: 13.0562
      dual_0.67: 13.0736
       dual_0.0: 13.0709
