In [29]:
from pathlib import Path
import pickle as pkl
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm

import os
import sys

os.environ['CUDA_VISIBLE_DEVICES'] = ''
sys.path.append('..')

import metrics

In [13]:
def load_validation(inp_length, overlap):
    
    with open('../data/yearly_{}_validation.pkl'.format(inp_length + 6), 'rb') as f:
        x, y = pkl.load(f)

    # Overlap input with output
    if overlap:
        y = np.c_[x[:, -overlap:], y]

    x = x[..., np.newaxis]
    y = y[..., np.newaxis]
    return x, y

In [14]:
def get_last_N(series, N=18):
    ser_N = series.dropna().iloc[-N:].values
    if len(ser_N) < N:
        pad = [ser_N[0]] * (N - len(ser_N))
        ser_N = np.r_[pad, ser_N]
    return ser_N

In [15]:
train_path = Path('../data/Yearly-train.csv')
test_path = Path('../data/Yearly-test.csv')

train = pd.read_csv(train_path).drop('V1', axis=1)
test = pd.read_csv(test_path).drop('V1', axis=1)

X_test = np.array([get_last_N(ser[1], 18) for ser in train.iterrows()])
y_test = test.values

X_val, y_val = load_validation(18, 6)

In [53]:
def get_predictions(model, X):
    preds = []

    for i in range(len(X) // 256):
        x = X[i * 256:(i + 1) * 256]

        mn, mx = x.min(axis=1).reshape(-1, 1), x.max(axis=1).reshape(-1, 1)
        x_sc = (x - mn) / (mx - mn)
        pred = model(x_sc[..., np.newaxis])
        preds.append(pred[..., 0] * (mx - mn) + mn)

    x = X[(i + 1) * 256:]
    mn, mx = x.min(axis=1).reshape(-1, 1), x.max(axis=1).reshape(-1, 1)

    x_sc = (x - mn) / (mx - mn)
    pred = model(x_sc[..., np.newaxis])
    preds.append(pred[..., 0] * (mx - mn) + mn)

    return np.vstack(preds)

In [62]:
def evaluate_model_ensembles(families, num_model_per_family, x, y):

    results = {'smape': {}, 'mase': {}, 'mae': {}, 'mse': {}}

    # Evaluate all models
    for family, num_models in tqdm(zip(families, num_model_per_family)):

        family_preds = []

        for num in range(num_models):

            trial = str(p / family) + '__' + str(num)
            model_dir = trial + '/best_weights.h5'

            smape = metrics.build_smape(overlap=6)
            mase_estimate = metrics.build_mase(overlap=6)
            owa_estimate = metrics.build_owa(overlap=6)
            reconstruction_loss = metrics.build_reconstruction_loss(overlap=6)

            model = tf.keras.models.load_model(model_dir, custom_objects={'SMAPE': smape,
                                                                          'MASE_estimate': mase_estimate,
                                                                          'OWA_estimate': owa_estimate,
                                                                          'reconstruction_loss': reconstruction_loss})

            preds = get_predictions(model, x)
            family_preds.append(preds)

            tf.keras.backend.clear_session()

            results['smape'][Path(trial).name] = np.nanmean(metrics.SMAPE(y[:, -6:], preds[:, -6:]))
            results['mase'][Path(trial).name] = np.nanmean(metrics.MASE(x, y[:, -6:], preds[:, -6:]))
            results['mae'][Path(trial).name] = np.nanmean(np.mean(np.abs(y[:, -6:] - preds[:, -6:]), axis=1))
            results['mse'][Path(trial).name] = np.nanmean(np.mean((y[:, -6:] - preds[:, -6:])**2, axis=1))

        ensemble_preds = np.median(np.array(family_preds), axis=0)

        results['smape'][family] = np.nanmean(metrics.SMAPE(y[:, -6:], ensemble_preds[:, -6:]))
        results['mase'][family] = np.nanmean(metrics.MASE(x, y[:, -6:], ensemble_preds[:, -6:]))
        results['mae'][family] = np.nanmean(np.mean(np.abs(y[:, -6:] - ensemble_preds[:, -6:]), axis=1))
        results['mse'][family] = np.nanmean(np.mean((y[:, -6:] - ensemble_preds[:, -6:])**2, axis=1))

    return results

In [63]:
p = Path('../results')
families = set(['__'.join(m.name.split('__')[:-1]) for m in p.glob('*') if m.name.startswith('inp_18__out_6')])
num_models = [len(list(p.glob(f + '*'))) for f in families]

In [64]:
val_results = evaluate_model_ensembles(families, num_models, X_val[..., 0], y_val[..., 0])



  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import

  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kern

  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import k

  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import ke

  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import k

  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app
  
  from ipykernel import kernelapp as app


15it [55:08, 220.57s/it][A[A


In [72]:
val_df = pd.DataFrame(val_results)
val_df.to_csv('/tmp/validation_df.csv')

In [73]:
test_results = evaluate_model_ensembles(families, num_models, X_test, y_test)



0it [00:00, ?it/s][A[A

1it [01:44, 104.66s/it][A[A

2it [03:26, 103.73s/it][A[A

3it [05:11, 104.22s/it][A[A

4it [06:57, 104.82s/it][A[A

5it [08:42, 104.68s/it][A[A

6it [10:28, 105.09s/it][A[A

7it [12:10, 104.30s/it][A[A

8it [13:48, 102.41s/it][A[A

9it [15:27, 101.33s/it][A[A

10it [17:06, 100.73s/it][A[A

11it [18:46, 100.46s/it][A[A

12it [20:27, 100.70s/it][A[A

13it [22:09, 100.83s/it][A[A

14it [23:50, 101.12s/it][A[A

15it [25:32, 102.16s/it][A[A


In [74]:
test_df = pd.DataFrame(test_results)
test_df.to_csv('/tmp/test_df.csv')

In [77]:
df = pd.merge(left=val_df, right=test_df, left_index=True, right_index=True)

In [83]:
df.corr()['smape_y']

smape_x    0.153260
mase_x    -0.006331
mae_x     -0.005334
mse_x     -0.006331
smape_y    1.000000
mase_y     0.875572
mae_y      0.842636
mse_y      0.650126
Name: smape_y, dtype: float64