This is a simple ensemble of RNN model and FIB (stand for Fibonacci) model. I did cross-validation and found that taking the 5:5 average of these two models is quite stable.

In [None]:
import argparse
import pickle

import numpy as np; np.seterr(invalid='ignore')
import pandas as pd

In [None]:
parser = {
    'offset': 803,
    'test_len': 63,
    'seed': 20170913,
    'forecast_start': '2017-09-13',
    'forecast_end': '2017-11-13'
}
args = argparse.Namespace(**parser)

In [None]:
def smape(y_pred, y_true):
    y_pred = np.around(y_pred)
    denominator = y_true + y_pred
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0
    return 200 * np.nanmean(diff)

In [None]:
def ensemble(pred_fib, pred_rnn, y_true):
    for i in range(0, 11):
        y_pred = (pred_fib * i + pred_rnn * (10 - i)) / 10
        print("{} fib + {} rnn = {}".format(i, 10-i, smape(y_pred, y_true)))

In [None]:
full = pd.read_csv('../data/wttsf/train_2.csv', index_col='Page')
# full = full.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
# y_true = full.iloc[:, args.offset+1:args.offset+args.test_len+1].values
y_true = full.iloc[:, -args.test_len:].values

In [None]:
with open("../intermediate/{}/pred_fib.pkl".format(args.seed), "rb") as f:
    pred_fib = pickle.load(f)
with open("../intermediate/{}/pred_rnn.pkl".format(args.seed), "rb") as f:
    pred_rnn = pickle.load(f)
    
# ensemble(pred_fib[:,:-1], pred_rnn[:,:-1], y_true)
prediction = (pred_rnn + pred_fib) / 2

In [None]:
test = pd.DataFrame()
test["Page"] = full.index
datetime_list = pd.date_range(args.forecast_start, args.forecast_end)
for datetime in datetime_list:
    test[datetime.date().isoformat()] = 0
test.iloc[:, 1:] = np.around(prediction[:, 2:])

In [None]:
test = pd.melt(test, id_vars='Page', var_name='Date', value_name="Visits")

key_df = pd.read_csv('../data/wttsf/key_2.csv')
key_df['Date'] = key_df['Page'].apply(lambda a: a[-10:])
key_df['Page'] = key_df['Page'].apply(lambda a: a[:-11])
key_df = key_df.merge(test, how="left")

key_df[['Id', 'Visits']].to_csv(
    '../intermediate/{}/submission.csv'.format(args.seed), index=False)