### setup

In [1]:
import argparse
import pickle

import numpy as np; np.seterr(invalid='ignore')
import pandas as pd

In [2]:
parser = {
    'offset': 719,
    'test_len': 74,
    'seed': 20170911,
    'forecast_start': '2017-09-13',
    'forecast_end': '2017-11-13'
}
args = argparse.Namespace(**parser)

In [3]:
def smape(y_pred, y_true):
    y_pred = np.around(y_pred)
    denominator = y_true + y_pred
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0
    return 200 * np.nanmean(diff)

In [7]:
def ensemble(pred_fib, pred_rnn, y_true):
    for i in range(0, 11):
        y_pred = (pred_fib * i + pred_rnn * (10 - i)) / 10
        print("{} fib + {} rnn = {}".format(i, 10-i, smape(y_pred, y_true)))

In [5]:
full = pd.read_csv('../data/wttsf/train_2.csv', index_col='Page')
full = full.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
# y_true = full.iloc[:, args.offset+1:args.offset+args.test_len+1].values
y_true = full.iloc[:, -args.test_len:].values

In [8]:
with open("../intermediate/{}/pred_fib.pkl".format(args.seed), "rb") as f:
    pred_fib = pickle.load(f)
with open("../intermediate/{}/pred_rnn.pkl".format(args.seed), "rb") as f:
    pred_rnn = pickle.load(f)
    
ensemble(pred_fib, pred_rnn, y_true)

0 fib + 10 rnn = 39.4374950183925
1 fib + 9 rnn = 38.80314519122691
2 fib + 8 rnn = 38.23295856681723
3 fib + 7 rnn = 37.7258948138124
4 fib + 6 rnn = 37.28949248787397
5 fib + 5 rnn = 36.90672912531349
6 fib + 4 rnn = 36.58127483621137
7 fib + 3 rnn = 36.316640569837745
8 fib + 2 rnn = 36.10334789156942
9 fib + 1 rnn = 35.946316727101326
10 fib + 0 rnn = 35.86728094201145


In [15]:
key_df = pd.read_csv('../data/wttsf/key_2.csv')
key_df['Date'] = key_df['Page'].apply(lambda a: a[-10:])
key_df['Page'] = key_df['Page'].apply(lambda a: a[:-11])

future_start = (pd.Timestamp(args.forecast_start)
                - pd.Timestamp(full.columns[-1])).days - 1
future_end = (pd.Timestamp(args.forecast_end)
              - pd.Timestamp(full.columns[-1])).days
future_period = future_end - future_start

prediction = (pred_rnn + pred_fib) / 2

visits = np.zeros(key_df.shape[0])
for i in range(0, len(visits), future_period):
    page = key_df['Page'][i]
    page_index = full.index.get_loc(page)
    visits[i:(i+future_period)] = prediction[page_index,
                                             future_start:future_end]

In [21]:
key_df['Visits'] = np.around(visits)
key_df[['Id', 'Visits']].to_csv(
    '../intermediate/{}/submission.csv'.format(args.seed), index=False)