### setup

In [1]:
import os
import argparse
import pickle
import time

import numpy as np; np.seterr(invalid='ignore')
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import TensorDataset, DataLoader

In [2]:
parser = {
    'data_path': '../data/wttsf/',
    'train_file': 'train_1.csv',
    'intermediate_path': '../intermediate/',
    'n_epoch': 4,
    'future': 73,
    'batch_size': 128,
    'hidden_size': 128,
    'log_every': 10,
    'read_from_file': True,
    'train': True,
    'model_name': 'model_seed20170901_epoch5_loss_0.3894.pth',
    'cuda': True,
    'seed': 20170901,
}
args = argparse.Namespace(**parser)

args.cuda = args.cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)

args.intermediate_path = os.path.join(args.intermediate_path, str(args.seed))

### model

In [3]:
class DenseLSTMForecast(nn.Module):
    def __init__(self, hidden_size):
        super(DenseLSTMForecast, self).__init__()
        self.lstm1 = nn.LSTMCell(15, hidden_size, bias=False)
        self.lstm2 = nn.LSTMCell(hidden_size+15, hidden_size, bias=False)
#        self.lstm3 = nn.LSTMCell(2*hidden_size+1, hidden_size, bias=False)
        self.linear = nn.Linear(2*hidden_size+15, 1)
        self.hidden_size = hidden_size

    def forward(self, x, feature, future=1):
        o = []
        tt = torch.cuda if args.cuda else torch
        h1_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        c1_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        h2_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        c2_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
#        h3_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
#        c3_t = Variable(tt.FloatTensor(x.size(0), self.hidden_size).zero_())
        
        for x_t in x.chunk(x.size(1), dim=1):
            x_t = x_t.squeeze(dim=1)
            xd_t = torch.cat([x_t, feature], dim=1)
            h1_t, c1_t = self.lstm1(xd_t, (h1_t, c1_t))
            h1d_t = torch.cat([xd_t, h1_t], dim=1)
            h2_t, c2_t = self.lstm2(h1d_t, (h2_t, c2_t))
            h2d_t = torch.cat([xd_t, h1_t, h2_t], dim=1)
#            h3_t, c3_t = self.lstm3(h2d_t, (h3_t, c3_t))
#            h3d_t = torch.cat([x_t, h1_t, h2_t, h3_t], dim=1)
            o_t = self.linear(h2d_t)
            o.append(o_t)
            
        for i in range(future-1):
            od_t = torch.cat([o_t, feature], dim=1)
            h1_t, c1_t = self.lstm1(od_t, (h1_t, c1_t))
            h1d_t = torch.cat([od_t, h1_t], dim=1)
            h2_t, c2_t = self.lstm2(h1d_t, (h2_t, c2_t))
            h2d_t = torch.cat([od_t, h1_t, h2_t], dim=1)
#            h3_t, c3_t = self.lstm3(h2d_t, (h3_t, c3_t))
#            h3d_t = torch.cat([o_t, h1_t, h2_t, h3_t], dim=1)
            o_t = self.linear(h2d_t)
            o.append(o_t)

        return torch.stack(o, dim=1)

### utils

In [4]:
def smape(y_pred, y_true):
    y_pred = np.around(np.clip(np.exp(y_pred)-1, 0, None))
    y_true = np.around(np.exp(y_true) - 1)
    raw_smape = np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))
    kaggle_smape = np.nan_to_num(raw_smape)
    return np.mean(kaggle_smape) * 200

In [5]:
def get_data():
    raw_data_file = os.path.join(args.intermediate_path,
                                 'raw_data.pkl')
    scaled_data_file = os.path.join(args.intermediate_path,
                                    'scaled_data.pkl')
    scaler_file = os.path.join(args.intermediate_path, 'scaler.pkl')
    features_file = os.path.join(args.intermediate_path, 'features.pkl')
    
    if not args.read_from_file:
        data_df = pd.read_csv(os.path.join(args.data_path, args.train_file),
                              index_col='Page')
        data_df["agent"] = data_df.index.str.rsplit('_').str.get(-1)
        data_df["access"] = data_df.index.str.rsplit('_').str.get(-2)
        data_df["project"] = data_df.index.str.rsplit('_').str.get(-3)
        features = pd.get_dummies(data_df[["agent", "access", "project"]],
            columns=["agent", "access", "project"]).values.astype('float32')
        raw_data = np.nan_to_num(
            data_df.iloc[:,:-3].values.astype('float32'))
        data = np.log1p(raw_data)
        scaler = StandardScaler()
        scaler.fit(np.swapaxes(data[:, :-args.future], 0, 1))
        scaled_data = scaler.transform(np.swapaxes(data, 0, 1))
        scaled_data = np.swapaxes(scaled_data, 0, 1)
        
        with open(raw_data_file, 'wb') as f:
            pickle.dump(raw_data, f)
        with open(scaled_data_file, 'wb') as f:
            pickle.dump(scaled_data, f)
        with open(scaler_file, 'wb') as f:
            pickle.dump(scaler, f)
        with open(features_file, 'wb') as f:
            pickle.dump(features, f)
    else:
        with open(raw_data_file, 'rb') as f:
            raw_data = pickle.load(f)
        with open(scaled_data_file, 'rb') as f:
            scaled_data = pickle.load(f)
        with open(scaler_file, 'rb') as f:
            scaler = pickle.load(f)
        with open(features_file, 'rb') as f:
            features = pickle.load(f)
    return raw_data, scaled_data, scaler, features

In [6]:
def train(raw_data, scaled_data, scaler, features,
          model, criterion, optimizer):
    p = np.random.permutation(raw_data.shape[0])
    input_tensor = torch.from_numpy(scaled_data[p, :-1]).unsqueeze(2)
    target_tensor = torch.from_numpy(scaled_data[p, 1:]).unsqueeze(2)
    features_tensor = torch.from_numpy(features[p, :])
    dataset = TensorDataset(input_tensor, target_tensor)
    data_loader = DataLoader(dataset, args.batch_size)
    
    train_loss = 0
    val_output_list = []
    init_time = time.time()
    for i, (inputt, target) in enumerate(data_loader):
        feature = features_tensor[i*args.batch_size:(i*args.batch_size
                                                     +inputt.size(0))]
        if args.cuda:
            inputt = inputt.cuda()
            target = target.cuda()
            feature = feature.cuda()
        inputt = Variable(inputt)
        target = Variable(target)
        feature = Variable(feature)
        
        output = model(inputt, feature)
        pos = np.random.randint(args.future, output.size(1)-args.future+1)
        pos_val = np.random.randint(args.future,
                                    output.size(1)-args.future+1)
        loss = criterion(output[:, pos:pos+args.future],
                         target[:, pos:pos+args.future])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.data[0] * inputt.size(0)

        if i % args.log_every == 0:
            val_output = output[
                :, pos_val:pos_val+args.future].data.squeeze(2).cpu().numpy()
            val_target = target[
                :, pos_val:pos_val+args.future].data.squeeze(2).cpu().numpy()
            val_scale = scaler.scale_[p][
                i*args.batch_size:i*args.batch_size+inputt.size(0)]
            val_mean = scaler.mean_[p][
                i*args.batch_size:i*args.batch_size+inputt.size(0)]
            raw_val_output = (val_output.T * val_scale + val_mean).T
            raw_val_target = (val_target.T * val_scale + val_mean).T
            val_loss = smape(raw_val_output, raw_val_target)
            print("   % Time: {:4.0f}s | Batch: {:4} | "
                  "Train loss: {:.4f} | Val loss: {:.4f}".format(
                      time.time()-init_time, i+1, loss.data[0], val_loss))
        
#     val_output_all = np.concatenate(val_output_list, axis=0)[inverse_p]
#     prediction = np.swapaxes(scaler.inverse_transform(
#             np.swapaxes(val_output_all, 0, 1)), 0, 1)
#     prediction = np.exp(prediction) - 1
#     prediction[prediction < 0] = 0
#     prediction = np.around(prediction)
    
    train_loss /= raw_data.shape[0]
#     val_loss = smape(prediction, raw_data[:, -args.future:])
    return train_loss

In [7]:
def forecast(scaled_data, scaler, features, model):
    input_tensor = torch.from_numpy(scaled_data).unsqueeze(2)
    target_tensor = torch.zeros(input_tensor.size(0))
    features_tensor = torch.from_numpy(features)
    dataset = torch.utils.data.TensorDataset(input_tensor, target_tensor)
    data_loader = DataLoader(dataset, args.batch_size)
    
    output_list = []
    for i, (inputt, _) in enumerate(data_loader):
        feature = features_tensor[i*args.batch_size:(i*args.batch_size
                                                     +inputt.size(0))]
        if args.cuda:
            inputt = inputt.cuda()
            feature = feature.cuda()
        inputt = Variable(inputt)
        feature = Variable(feature)
        output = model(inputt, feature, args.future)
        output_list.append(output.data.squeeze(2).cpu().numpy()
                           [:, -args.future:])
        
    output_all = np.concatenate(output_list, axis=0)
    prediction = np.swapaxes(scaler.inverse_transform(
            np.swapaxes(output_all, 0, 1)), 0, 1)
    prediction = np.around(np.clip(np.exp(prediction) - 1, 0, None))
    return prediction

In [8]:
def save_model(model, epoch, loss):
    model_file = os.path.join(args.intermediate_path,
                              "model_seed{}_epoch{}_loss_{:.4f}.pth"
                              .format(args.seed, epoch, loss))
    torch.save(model.state_dict(), os.path.join(model_file))

### prepare

In [9]:
raw_data, scaled_data, scaler, features = get_data()

In [10]:
model = DenseLSTMForecast(args.hidden_size)
if args.cuda:
    model.cuda()
criterion = nn.L1Loss()

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.5)

### train

In [12]:
if args.train:
    for epoch in range(1, args.n_epoch+1):
        print("=> EPOCH {}".format(epoch))
        scheduler.step()
        train_loss = train(raw_data, scaled_data, scaler, features,
                           model, criterion, optimizer)
        print("   % Train loss {:.4f}".format(train_loss))
#        save_model(model, epoch, val_loss)
else:
    model_file = os.path.join(args.intermediate_path, args.model_name)
    model.load_state_dict(torch.load(model_file))

=> EPOCH 1
   % Time:    2s | Batch:    1 | Train loss: 0.6384 | Val loss: 55.0431
   % Time:   18s | Batch:   11 | Train loss: 0.5400 | Val loss: 41.6493
   % Time:   34s | Batch:   21 | Train loss: 0.4547 | Val loss: 30.7615
   % Time:   49s | Batch:   31 | Train loss: 0.5189 | Val loss: 36.6601
   % Time:   65s | Batch:   41 | Train loss: 0.4714 | Val loss: 37.8609
   % Time:   81s | Batch:   51 | Train loss: 0.5411 | Val loss: 34.3930
   % Time:   97s | Batch:   61 | Train loss: 0.4675 | Val loss: 30.5571
   % Time:  112s | Batch:   71 | Train loss: 0.4547 | Val loss: 35.0308
   % Time:  128s | Batch:   81 | Train loss: 0.4996 | Val loss: 32.0318
   % Time:  144s | Batch:   91 | Train loss: 0.4490 | Val loss: 31.3885
   % Time:  160s | Batch:  101 | Train loss: 0.4388 | Val loss: 32.4970
   % Time:  175s | Batch:  111 | Train loss: 0.5211 | Val loss: 31.3125
   % Time:  191s | Batch:  121 | Train loss: 0.4598 | Val loss: 30.4110
   % Time:  207s | Batch:  131 | Train loss: 0.4345 |

### test

In [13]:
prediction = forecast(scaled_data, scaler, features, model)
prediction_file = os.path.join(args.intermediate_path,
                               'prediction_seed{}.pkl'.format(args.seed))

with open(prediction_file, 'wb') as f:
    pickle.dump(prediction, f)