**Model Config**
Use Encoder-Decoder model

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pandas as pd
import json
import time
import math
import random

import data_module as dm

from sklearn.model_selection import train_test_split

SOS = 0
EOS = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


**Data Preprocessing**

- Load CSV file dataset
- Create Torch Dataset
- Create Torch DataLoader
- Create padding func (with insertion of SOS and EOS tokens



In [2]:
train_dl, test_dl, train_len, test_len = dm.get_loader()

In [17]:
it = iter(train_dl)
fs = next(it)
fs[1][3]

tensor([[0.7239],
        [0.7259],
        [0.7199],
        [0.7330],
        [0.7276],
        [0.7138],
        [0.7202],
        [0.7176],
        [0.7163],
        [0.7205],
        [0.7240],
        [0.7218],
        [0.7154],
        [0.7202],
        [0.7224],
        [0.7154],
        [0.7194],
        [0.7226],
        [0.7254],
        [0.7186],
        [0.7159],
        [0.7138],
        [0.7232],
        [0.7137],
        [0.7231],
        [0.7148],
        [0.7158],
        [0.7175],
        [0.7209],
        [0.7163],
        [0.7292],
        [0.7228],
        [0.7138],
        [0.7272],
        [0.7167],
        [0.7232],
        [0.7274],
        [0.7143],
        [0.7157],
        [0.7236],
        [0.7166],
        [0.7163],
        [0.7138],
        [0.7240],
        [0.7183],
        [0.7269],
        [0.7290],
        [0.7176],
        [0.7221],
        [0.7290],
        [0.7111],
        [0.7175],
        [0.7158],
        [0.7317],
        [0.7237],
        [0

**Model Building**
- Encoder-Decoder architecture
    - Encoder -> MLP or CNN
    - Decoder -> LSTM RNN
    - Batch Normalization in both


In [33]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        
        self.gru = nn.GRU(1, 128)
        
    def forward(self, in_feats, hidden):
        do, hidden = self.gru(in_feats.to(torch.float), hidden.to(torch.float))
        do = torch.sigmoid(do)
        
        return do, hidden
    
    def init_hidden(self, bs):
        return nn.init.xavier_normal_(torch.ones((1, bs, 128)))

In [34]:
class Pred(nn.Module):
    def __init__(self):
        super(Pred, self).__init__()
        
        self.fc1 = nn.Linear(594+128, 512) # Take in 594 feats from gru hidden plus categoricals
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc7 = nn.Linear(64, 1)
        self.sig = nn.Sigmoid()

        self.dropout = nn.Dropout(0.4)
    def forward(self, in_feats):
        x = self.dropout(F.relu(self.fc1(in_feats)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = (self.sig(self.fc7(x)) * 2 * 716.4264615618442) % (716.4264615618442 + 2*684.7511617508213)
        x = x - (x % 15)
        
        return x

In [35]:
class RMSE(nn.Module):
    def __init__(self):
        super(RMSE, self).__init__()
        
        self.crit = nn.MSELoss()
    
    def forward(self, x, y):
        return torch.sqrt(self.crit(x.squeeze(0).to(torch.float64), y.squeeze(0).to(torch.float64)))

**Model Training**

In [36]:
# Trainer helper functions from 
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#the-seq2seq-model
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s' % (asMinutes(s))

In [37]:
def train(in_feats, cl, tt, decoder, pred, dec_optim, pred_optim, criterion):
    dec_optim.zero_grad()
    pred_optim.zero_grad()
    
    in_feats = in_feats.to(device)
    cl = cl.to(device)
    tt = tt.to(device)
    
    loss = 0
    
    di = cl[0].unsqueeze(0).to(device)
    dh = decoder.init_hidden(64).to(device)
                    
    use_teacher_forcing = True if random.random() < 0.5 else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for i in range(len(cl) - 1):
            do, dh = decoder(di, dh)
            di = cl[i + 1].unsqueeze(0).to(device)  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            
            topv, topi = do.topk(1, dim=2)
            di = topv.detach().to(device)  # detach from history as input


    pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)

    pred_time = pred(pred_in)
    loss += criterion(pred_time, tt.unsqueeze(0).unsqueeze(-1))
    loss.backward()
        
    dec_optim.step()
    pred_optim.step()
    
    return loss.item()

In [38]:
def trainEpochs(decoder, predictor, n_epochs, print_every=1000, eval_every = 5, learning_rate=0.003):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate)
    pred_optim = optim.Adam(predictor.parameters(), lr=learning_rate)
    criterion = RMSE()
    ds = optim.lr_scheduler.ReduceLROnPlateau(dec_optim, 'min', 0.25, 3)
    ps = optim.lr_scheduler.ReduceLROnPlateau(pred_optim, 'min', 0.25, 3)

    
    epoch_loss_max = math.inf
    
    it = iter(train_dl)
    d = next(it)
    
    for epoch in range(n_epochs):
        for i, data in enumerate(train_dl):
            if (i > 1):
                break

            in_feats = d[0] # back to data

            if (in_feats.shape[0] != 64):
                continue

            cl = d[1] # back to data
            tt = d[2] # back to data
            loss = train(in_feats, cl, tt, decoder, pred, dec_optim, pred_optim, criterion)
            print_loss_total += loss

            if (i % print_every == 0) and (i != 0): # Change back to i
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0



                print('Epoch: %d Elapsed: %s Percent of epoch Complete: (%d%%) %.4f' % (epoch, timeSince(start, i / (train_len / 128)),
                                                                  i / (train_len / 64) * 100, print_loss_avg))


            if (i % eval_every == 0) and (i != 0):
                print('*****EVALUATING*****')
                eval_loss = eval_epoch(encoder, decoder, pred, epoch)
                ds.step(eval_loss)
                ps.step(eval_loss)





In [39]:
def evaluate(decoder, pred, in_feats, cl, tt, max_len=1500):
    with torch.no_grad():
        
        eval_loss = RMSE()
        in_feats = in_feats.to(device)
        cl = cl.to(device)
        tt = tt.to(device)



        di = torch.zeros((1,64,1)).to(device)
        dh = decoder.init_hidden(in_feats).to(device)

        for i in range(1000):
            do, dh = dec(di, dh)

            topv, topi = do.topk(1, dim=2)
            di = topv.detach().to(device)  # detach from history as input
            
        pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)
        pred_time = pred(pred_in)
        
        l = eval_loss(pred_time, tt.unsqueeze(0).unsqueeze(-1))
        
        return l
    
def eval_epoch(decoder, pred, epoch):
    encoder.eval()
    decoder.eval()
    pred.eval()

    accs = []
    for i, data in enumerate(test_dl):
        in_feats = data[0]
        if (in_feats.shape[0] != 64):
            continue
        cl = data[1]
        tt = data[2]
        accs.append(evaluate(decoder, pred, in_feats, cl, tt))
        
        if (i > 100):
            break
    
    epoch_acc = (sum(accs) / len(accs)) if len(accs) > 0 else 0
    print('Epoch: %d, Loss on test: %.4f' % (epoch, epoch_acc))
    encoder.train()
    decoder.train()
    pred.train()
    return epoch_acc

In [40]:
dec = Decoder().to(device)
pred = Pred().to(device)

trainEpochs(dec, pred, 100, print_every=1, eval_every = 250, learning_rate = 0.003)


Epoch: 0 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.6849
Epoch: 1 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.1146
Epoch: 2 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.5464
Epoch: 3 Elapsed: 0m 0s Percent of epoch Complete: (0%) 627.9798
Epoch: 4 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.4510
Epoch: 5 Elapsed: 0m 0s Percent of epoch Complete: (0%) 627.9649
Epoch: 6 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.9603
Epoch: 7 Elapsed: 0m 0s Percent of epoch Complete: (0%) 629.0403
Epoch: 8 Elapsed: 0m 0s Percent of epoch Complete: (0%) 628.7487
Epoch: 9 Elapsed: 0m 0s Percent of epoch Complete: (0%) 630.4184
Epoch: 10 Elapsed: 0m 1s Percent of epoch Complete: (0%) 629.9383
Epoch: 11 Elapsed: 0m 1s Percent of epoch Complete: (0%) 625.6654
Epoch: 12 Elapsed: 0m 1s Percent of epoch Complete: (0%) 630.2950
Epoch: 13 Elapsed: 0m 1s Percent of epoch Complete: (0%) 628.3554
Epoch: 14 Elapsed: 0m 1s Percent of epoch Complete: (0%) 624.4341
Epoch: 15 Elapsed: 0

In [24]:
def submission(enc, dec, pred):
    test_dl, test_len = dm.get_loader(test=True)
    trip_ids = []
    pred_times = []
    
    with torch.no_grad():
        enc.eval()
        dec.eval()
        pred.eval()
        for i, (trip_id, in_feats) in enumerate(test_dl):
            in_feats = in_feats.to(device)
            
            hidden = enc(in_feats)

            di = torch.Tensor([0]).unsqueeze(0).unsqueeze(0).to(device)
            dh = hidden.to(device)
                                
            for i in range(200):
                do, dh = dec(di, dh)
            
                topv, topi = do.topk(1, dim=2)
                di = topv.detach().to(device)  # detach from history as input
                if (topv == EOS):
                    break
            
            pred_in = torch.cat((in_feats, dh.squeeze(0)), dim=1).to(device)
            pred_time = pred(pred_in)
            trip_ids.append(trip_id[0])
            pred_times.append(pred_time.item())

    df_sample = pd.read_csv("data/sampleSubmission.csv")
    df_sample["TRAVEL_TIME"] = pred_times
    df_sample.to_csv('submission.csv', index=None)

In [25]:
submission(enc, dec, pred)