### setup

In [1]:
import os
import argparse
import pickle

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

In [2]:
parser = {
    'data_path': '../data/wttsf/',
    'train_file': 'train_1.csv',
    'key_file': 'key_1.csv',
    'intermediate_path': '../intermediate/ensemble/',
    'future': 73,
    'batch_size': 64,
    'hidden_size': 256,
    'read_from_file': True,
    'model2_name': 'model_20170905_epoch6_loss26.4678.pth',
    'model3_name': 'model_20170906_epoch6_loss26.4670.pth',
    'forecast_start': '2017-01-01',
    'forecast_end': '2017-03-01',
    'cuda': True,
}
args = argparse.Namespace(**parser)

args.cuda = args.cuda and torch.cuda.is_available()

### model

In [3]:
class DenseLSTMForecast(nn.Module):
    def __init__(self, hidden_size):
        super(DenseLSTMForecast, self).__init__()
        self.lstm1 = nn.LSTMCell(15, hidden_size)
        self.lstm2 = nn.LSTMCell(hidden_size+15, hidden_size)
        self.lstm3 = nn.LSTMCell(2*hidden_size+15, hidden_size)
        self.linear = nn.Linear(3*hidden_size+15, 1)
        self.hidden_size = hidden_size

    def forward(self, x, feature, future=1):
        o = []
        tt = torch.cuda if args.cuda else torch
        h1_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        c1_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        h2_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        c2_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        h3_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        c3_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        
        for x_t in x.chunk(x.size(1), dim=1):
            x_t = x_t.squeeze(dim=1)
            xd_t = torch.cat([x_t, feature], dim=1)
            h1_t, c1_t = self.lstm1(xd_t, (h1_t, c1_t))
            h1d_t = torch.cat([xd_t, h1_t], dim=1)
            h2_t, c2_t = self.lstm2(h1d_t, (h2_t, c2_t))
            h2d_t = torch.cat([xd_t, h1_t, h2_t], dim=1)
            h3_t, c3_t = self.lstm3(h2d_t, (h3_t, c3_t))
            h3d_t = torch.cat([xd_t, h1_t, h2_t, h3_t], dim=1)
            o_t = self.linear(h3d_t)
            o.append(o_t)
            
        for i in range(future-1):
            od_t = torch.cat([o_t, feature], dim=1)
            h1_t, c1_t = self.lstm1(od_t, (h1_t, c1_t))
            h1d_t = torch.cat([od_t, h1_t], dim=1)
            h2_t, c2_t = self.lstm2(h1d_t, (h2_t, c2_t))
            h2d_t = torch.cat([od_t, h1_t, h2_t], dim=1)
            h3_t, c3_t = self.lstm3(h2d_t, (h3_t, c3_t))
            h3d_t = torch.cat([od_t, h1_t, h2_t, h3_t], dim=1)
            o_t = self.linear(h3d_t)
            o.append(o_t)

        return torch.stack(o, dim=1)

### utils

In [4]:
def get_data():
    scaled_data_file = os.path.join(args.intermediate_path,
                                    'scaled_data.pkl')
    scaler_file = os.path.join(args.intermediate_path, 'scaler.pkl')
    features_file = os.path.join(args.intermediate_path, 'features.pkl')
    
    if not args.read_from_file:
        data_df = pd.read_csv(os.path.join(args.data_path, args.train_file),
                              index_col='Page')
        data_df = data_df.fillna(method='ffill', axis=1).fillna(
            method='bfill', axis=1)
        data_df["agent"] = data_df.index.str.rsplit('_').str.get(-1)
        data_df["access"] = data_df.index.str.rsplit('_').str.get(-2)
        data_df["project"] = data_df.index.str.rsplit('_').str.get(-3)
        features = pd.get_dummies(data_df[["agent", "access", "project"]],
            columns=["agent", "access", "project"]).values.astype('float32')
        raw_data = np.nan_to_num(
            data_df.iloc[:,:-3].values.astype('float32'))
        data = np.log1p(raw_data)
        scaler = StandardScaler()
        scaler.fit(np.swapaxes(data, 0, 1))
#        scaler.fit(np.swapaxes(data[:, :-args.future], 0, 1))
        scaled_data = scaler.transform(np.swapaxes(data, 0, 1))
        scaled_data = np.swapaxes(scaled_data, 0, 1)
        
        with open(scaled_data_file, 'wb') as f:
            pickle.dump(scaled_data, f)
        with open(scaler_file, 'wb') as f:
            pickle.dump(scaler, f)
        with open(features_file, 'wb') as f:
            pickle.dump(features, f)
    else:
        with open(scaled_data_file, 'rb') as f:
            scaled_data = pickle.load(f)
        with open(scaler_file, 'rb') as f:
            scaler = pickle.load(f)
        with open(features_file, 'rb') as f:
            features = pickle.load(f)
    return scaled_data, scaler, features

In [5]:
def forecast(scaled_data, scaler, features, model):
    input_tensor = torch.from_numpy(scaled_data).unsqueeze(2)
    target_tensor = torch.zeros(input_tensor.size(0))
    features_tensor = torch.from_numpy(features)
    dataset = torch.utils.data.TensorDataset(input_tensor, target_tensor)
    data_loader = DataLoader(dataset, 256)
    
    output_list = []
    for i, (inputt, _) in enumerate(data_loader):
        feature = features_tensor[i*args.batch_size:(i*args.batch_size
                                                     +inputt.size(0))]
        if args.cuda:
            inputt = inputt.cuda()
            feature = feature.cuda()
        inputt = Variable(inputt)
        feature = Variable(feature)
        output = model(inputt, feature, args.future)
        output_list.append(output.data.squeeze(2).cpu().numpy()
                           [:, -args.future:])
        
    output_all = np.concatenate(output_list, axis=0)
    prediction = np.swapaxes(scaler.inverse_transform(
            np.swapaxes(output_all, 0, 1)), 0, 1)
    return prediction

### prepare

In [6]:
scaled_data, scaler, features = get_data()

In [7]:
model = DenseLSTMForecast(args.hidden_size)
if args.cuda:
    model.cuda()

### test

In [8]:
# prediction1_file = os.path.join(args.intermediate_path, 'prediction1.pkl')
# if 1 == 0:
#     model1_file = os.path.join(args.intermediate_path, args.model1_name)
#     model.load_state_dict(torch.load(model1_file))

#     prediction1 = forecast(scaled_data, scaler, features, model)

#     with open(prediction1_file, 'wb') as f:
#         pickle.dump(prediction1, f)
# else:
#     with open(prediction1_file, 'rb') as f:
#         prediction1 = pickle.load(f)

In [9]:
prediction2_file = os.path.join(args.intermediate_path, 'prediction2.pkl')
if 1 == 0:
    model2_file = os.path.join(args.intermediate_path, args.model2_name)
    model.load_state_dict(torch.load(model2_file))

    prediction2 = forecast(scaled_data, scaler, features, model)

    with open(prediction2_file, 'wb') as f:
        pickle.dump(prediction2, f)
else:
    with open(prediction2_file, 'rb') as f:
        prediction2 = pickle.load(f)

In [10]:
prediction3_file = os.path.join(args.intermediate_path, 'prediction3.pkl')
if 1 == 0:
    model3_file = os.path.join(args.intermediate_path, args.model3_name)
    model.load_state_dict(torch.load(model3_file))

    prediction3 = forecast(scaled_data, scaler, features, model)

    with open(prediction3_file, 'wb') as f:
        pickle.dump(prediction3, f)
else:
    with open(prediction3_file, 'rb') as f:
        prediction3 = pickle.load(f)

In [11]:
prediction = (prediction2 + prediction3) / 2
prediction = np.clip(np.exp(prediction) - 1, 0, None)

In [17]:
data_df = pd.read_csv(os.path.join(args.data_path,
                                   args.train_file)).fillna(0)
data_df.index = data_df['Page']
key_df = pd.read_csv(os.path.join(args.data_path, args.key_file))
key_df['Date'] = key_df['Page'].apply(lambda a: a[-10:]).astype(
    'datetime64[ns]')
key_df['Page'] = key_df['Page'].apply(lambda a: a[:-11])
key_df['Weekend']= key_df['Date'].dt.dayofweek >= 5

In [18]:
windows = [6, 12, 18, 30, 48, 78, 126, 203, 329]
for i in windows:
    val = 'MW'+str(i)
    tmp = pd.melt(data_df[list(data_df.columns[-i:])+['Page']], 
                  id_vars='Page', var_name='Date', value_name=val)
    tmp['Date'] = tmp['Date'].astype('datetime64[ns]')
    tmp['Weekend']= tmp['Date'].dt.dayofweek >= 5
    tmp = tmp.groupby(['Page','Weekend']).median().reset_index()
    key_df = key_df.merge(tmp, how='left')
    
key_df['Visits_MW']= key_df.iloc[:, 4:].median(axis=1)

In [19]:
future_start = (pd.Timestamp(args.forecast_start)
                - pd.Timestamp(data_df.columns[-1])).days - 1
future_end = (pd.Timestamp(args.forecast_end)
              - pd.Timestamp(data_df.columns[-1])).days
future_period = future_end - future_start

visits = np.zeros(key_df.shape[0])
for i in range(0, len(visits), future_period):
    page = key_df['Page'][i]
    page_index = data_df.index.get_loc(page)
    visits[i:(i+future_period)] = prediction[page_index,
                                             future_start:future_end]

key_df['Visits_RNN'] = visits

In [21]:
key_df['Visits'] = (key_df['Visits_MW'] + key_df['Visits_RNN']) / 2
key_df['Visits'] = key_df['Visits'].round().astype(int)
submission_file = os.path.join(args.intermediate_path, 'submission5.csv')
key_df[['Id', 'Visits']].to_csv(submission_file, index=False)

In [22]:
key_df.head()

Unnamed: 0,Page,Id,Date,Weekend,MW6,MW12,MW18,MW30,MW48,MW78,MW126,MW203,MW329,Visits_MW,Visits_RNN,Visits
0,!vote_en.wikipedia.org_all-access_all-agents,bf4edcf969af,2017-01-01,True,1.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,2.436407,3
1,!vote_en.wikipedia.org_all-access_all-agents,929ed2bf52b9,2017-01-02,False,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.55873,2
2,!vote_en.wikipedia.org_all-access_all-agents,ff29d0f51d5c,2017-01-03,False,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.467755,2
3,!vote_en.wikipedia.org_all-access_all-agents,e98873359be6,2017-01-04,False,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.470027,2
4,!vote_en.wikipedia.org_all-access_all-agents,fa012434263a,2017-01-05,False,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.525051,2
