In [16]:
import tensorflow as tf
from pathlib import Path
import pandas as pd
import numpy as np

import sys
sys.path.append('..')

import networks
import metrics
import evaluate
from datasets import seq2seq_generator, seq2seq_generator_with_aug

In [3]:
test_set = seq2seq_generator('../data/yearly_24_validation.pkl', overlap=8, augmentation=0)

In [4]:
reconstruction_loss = metrics.build_reconstruction_loss(overlap=8)

metric_functions = ['mse', 'mae', reconstruction_loss]

hparams = {
    'input_seq_length': 18,
    'output_seq_length': 14,
    'bottleneck_size': 700,
    'bottleneck_activation': 'relu',
    'loss_function': 'mae',
}

model = networks.convolutional_ae_4_layer(hparams, metric_functions)

In [5]:
for e in range(10):

    train_set = seq2seq_generator_with_aug('../data/yearly_24_train.pkl',
                                           '../data/yearly_24_train_aug.pkl',
                                           overlap=8)

    callbacks = [tf.keras.callbacks.ModelCheckpoint(
                    '../experimental/results/custom_aug_1/best_weights_{}.h5'.format(e),
                save_best_only=True)]

    model.fit(train_set, epochs=1, steps_per_epoch=len(train_set)//256+1,
                  validation_steps=len(test_set)//256+1, validation_data=test_set,
                  callbacks=callbacks)
    
    tf.keras.backend.clear_session()

Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps
Train for 690 steps, validate for 230 steps


## Evaluation

In [18]:
# Read test data
train_path = Path('../data/Yearly-train.csv')
test_path = Path('../data/Yearly-test.csv')

train = pd.read_csv(train_path).drop('V1', axis=1)
test = pd.read_csv(test_path).drop('V1', axis=1)

X_test = np.array([evaluate.get_last_N(ser[1], N=18) for ser in train.iterrows()])
y_test = test.values

In [22]:
all_preds = []

for i in range(10):
    
    model_dir = '../experimental/results/custom_aug_1/best_weights_{}.h5'.format(i)

    model = tf.keras.models.load_model(model_dir, custom_objects={'reconstruction_loss': reconstruction_loss})

    preds = evaluate.get_predictions(model, X_test)
    
    all_preds.append(preds)
    


In [24]:
for p in all_preds:
    print(np.nanmean(metrics.SMAPE(y_test, p[:, -6:])))

ensemble_preds = np.median(np.array(all_preds), axis=0)
print(np.nanmean(metrics.SMAPE(y_test, ensemble_preds[:, -6:])))

13.170303749685267
13.352349858736359
13.321846794206998
13.198059962825095
13.231100107162078
13.361803744681916
13.401830929786858
13.430831158900618
13.451569477888137
13.464930270744931
13.146571540612008
