**Model Config**
Use Encoder-Decoder model

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pandas as pd
import json
import time
import math
import random

import data_module as dm

from sklearn.model_selection import train_test_split

SOS = -1
EOS = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Data Preprocessing**

- Load CSV file dataset
- Create Torch Dataset
- Create Torch DataLoader
- Create padding func (with insertion of SOS and EOS tokens



In [2]:
train_dl, test_dl, train_len, test_len = dm.get_loader()

**Model Building**
- Encoder-Decoder architecture
    - Encoder -> MLP or CNN
    - Decoder -> LSTM RNN
    - Batch Normalization in both


In [16]:
""" Model Architectures """

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        
        self.fc1 = nn.Linear(594, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.fc3 = nn.Linear(1024,512)
        self.bn3 = nn.BatchNorm1d(512)
        self.fc4 = nn.Linear(512, 128)
        self.gru = nn.GRU(128, 128)
        
        self.dropout = nn.Dropout(0.6)
    
    def forward(self, input):
        x = self.dropout(F.relu(self.bn1(self.fc1(input.float()))))
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(F.relu(self.bn3(self.fc3(x))))
        x = F.relu(self.fc4(x))
        x = x.view(1, -1, 128)
        _, hidden = self.gru(x) # Input should be [1, 128, 128]
        
        return hidden

In [17]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        
        self.gru = nn.GRU(1, 128)
        
    def forward(self, in_feats, hidden):
        do, hidden = self.gru(in_feats.to(torch.float), hidden.to(torch.float))
        do = torch.tanh(do)
        
        return do, hidden

In [18]:
class Pred(nn.Module):
    def __init__(self):
        super(Pred, self).__init__()
        
        self.fc1 = nn.Linear(128+594, 512) # Take in 128 feats from gru hidden plus categoricals
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512,256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 64)
        self.fc4 = nn.Linear(64, 1)
        self.sig = nn.Sigmoid()

        self.dropout = nn.Dropout(0.5)
    def forward(self, in_feats):
        x = self.dropout(F.relu(self.bn1(self.fc1(in_feats))))
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = (self.sig(self.fc4(x)) * 3 * 716.4264615618442) % (716.4264615618442 + 3*684.7511617508213)
        
        return x

In [19]:
class RMSE(nn.Module):
    def __init__(self):
        super(RMSE, self).__init__()
        
        self.crit = nn.MSELoss()
    
    def forward(self, x, y):
        return torch.sqrt(self.crit(x.squeeze(0).to(torch.float64), y.squeeze(0).to(torch.float64)))

**Model Training**

In [20]:
# Trainer helper functions from 
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#the-seq2seq-model
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s' % (asMinutes(s))

In [21]:
def train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion):
    enc_optim.zero_grad()
    dec_optim.zero_grad()
    pred_optim.zero_grad()
    
    in_feats = in_feats.to(device)
    cl = cl.to(device)
    tt = tt.to(device)
    
    loss = 0
    
    hidden = encoder(in_feats)
    
    
    di = cl[0].unsqueeze(0).to(device)
    dh = hidden.to(device)
                    
    use_teacher_forcing = True if random.random() < 0.3 else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for i in range(len(cl) - 1):
            do, dh = decoder(di, dh)
            di = cl[i + 1].unsqueeze(0).to(device)  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            
            topv, topi = do.topk(1, dim=2)
            di = topi.detach().to(device)  # detach from history as input


    pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)

    pred_time = pred(pred_in)
    loss += criterion(pred_time, tt.unsqueeze(0).unsqueeze(-1))
    loss.backward()
        
    enc_optim.step()
    dec_optim.step()
    pred_optim.step()
    
    return loss.item()

In [22]:
def trainEpochs(encoder, decoder, predictor, n_epochs, print_every=1000, eval_every = 5, learning_rate=0.003):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    enc_optim = optim.AdamW(encoder.parameters(), lr=learning_rate)
    dec_optim = optim.AdamW(decoder.parameters(), lr=learning_rate)
    pred_optim = optim.AdamW(predictor.parameters(), lr=learning_rate)
    criterion = RMSE()
    es = optim.lr_scheduler.ReduceLROnPlateau(enc_optim, 'min', 0.25, 3)
    ds = optim.lr_scheduler.ReduceLROnPlateau(dec_optim, 'min', 0.25, 3)
    ps = optim.lr_scheduler.ReduceLROnPlateau(pred_optim, 'min', 0.25, 3)

    
    epoch_loss_max = math.inf

    for epoch in range(n_epochs):
        for i, data in enumerate(train_dl):
            in_feats = data[0]

            if (in_feats.shape[0] != 64):
                continue

            cl = data[1]
            tt = data[2]
            loss = train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion)
            print_loss_total += loss


            if (i % 1000 == 0):
                if (print_loss_total < epoch_loss_max):
                    epoch_loss_max = print_loss_total
                    torch.save({
                            'epoch': epoch,
                            'encoder_state_dict': encoder.state_dict(),
                            'encoder_optimizer_state_dict': enc_optim.state_dict(),
                            'decoder_state_dict': decoder.state_dict(),
                            'decoder_optimizer_state_dict': dec_optim.state_dict(),
                            'loss': print_loss_total,
                            }, 'model.pt')

            if (i % print_every == 0) and (i != 0): # Change back to i
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0



                print('Epoch: %d Elapsed: %s Percent of epoch Complete: (%d%%) %.4f' % (epoch, timeSince(start, i / (train_len / 128)),
                                                                  i / (train_len / 64) * 100, print_loss_avg))


            if (i % eval_every == 0) and (i != 0):
                print('*****EVALUATING*****')
                eval_loss = eval_epoch(encoder, decoder, pred, epoch)
                es.step(eval_loss)
                ds.step(eval_loss)
                ps.step(eval_loss)





In [23]:
def evaluate(encoder, decoder, pred, in_feats, cl, tt, max_len=1500):
    with torch.no_grad():
        
        eval_loss = RMSE()
        in_feats = in_feats.to(device)
        cl = cl.to(device)
        tt = tt.to(device)

        hidden = encoder(in_feats)


        di = -1 * torch.ones((1,64,1)).to(device)
        dh = hidden.to(device)

        for i in range(1000):
            do, dh = dec(di, dh)

            topv, topi = do.topk(1, dim=2)
            di = topi.detach().to(device)  # detach from history as input
            
        pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)
        pred_time = pred(pred_in)
        
        l = eval_loss(pred_time, tt.unsqueeze(0).unsqueeze(-1))
        
        return l
    
def eval_epoch(encoder, decoder, pred, epoch):
    encoder.eval()
    decoder.eval()
    pred.eval()

    accs = []
    for i, data in enumerate(test_dl):
        in_feats = data[0]
        if (in_feats.shape[0] != 64):
            continue
        cl = data[1]
        tt = data[2]
        accs.append(evaluate(encoder, decoder, pred, in_feats, cl, tt))
        
        if (i > 100):
            break
    
    epoch_acc = (sum(accs) / len(accs)) if len(accs) > 0 else 0
    print('Epoch: %d, Loss on test: %.4f' % (epoch, epoch_acc))
    encoder.train()
    decoder.train()
    pred.train()
    return epoch_acc

In [24]:
enc = Encoder().to(device)
dec = Decoder().to(device)
pred = Pred().to(device)

trainEpochs(enc, dec, pred, 1, print_every=100, eval_every = 500, learning_rate = 0.003)


Epoch: 0 Elapsed: 0m 21s Percent of epoch Complete: (0%) 627.6318
Epoch: 0 Elapsed: 0m 41s Percent of epoch Complete: (1%) 564.1780
Epoch: 0 Elapsed: 1m 1s Percent of epoch Complete: (1%) 531.0736
Epoch: 0 Elapsed: 1m 19s Percent of epoch Complete: (2%) 482.3594
Epoch: 0 Elapsed: 1m 40s Percent of epoch Complete: (2%) 551.2035
*****EVALUATING*****
Epoch: 0, Loss on test: 1499.5993
Epoch: 0 Elapsed: 2m 23s Percent of epoch Complete: (3%) 467.5521
Epoch: 0 Elapsed: 2m 44s Percent of epoch Complete: (3%) 523.3450
Epoch: 0 Elapsed: 3m 5s Percent of epoch Complete: (4%) 519.3726
Epoch: 0 Elapsed: 3m 27s Percent of epoch Complete: (4%) 555.4876
Epoch: 0 Elapsed: 3m 45s Percent of epoch Complete: (5%) 456.0065
*****EVALUATING*****
Epoch: 0, Loss on test: 1536.5922
Epoch: 0 Elapsed: 4m 29s Percent of epoch Complete: (5%) 517.7286
Epoch: 0 Elapsed: 4m 49s Percent of epoch Complete: (6%) 509.5585
Epoch: 0 Elapsed: 5m 9s Percent of epoch Complete: (6%) 499.5874
Epoch: 0 Elapsed: 5m 30s Percent of

KeyboardInterrupt: 

In [25]:
def submission(enc, dec, pred):
    test_dl, test_len = dm.get_loader(test=True)
    trip_ids = []
    pred_times = []
    
    with torch.no_grad():
        enc.eval()
        dec.eval()
        pred.eval()
        for i, (trip_id, in_feats) in enumerate(test_dl):
            in_feats = in_feats.to(device)
            
            hidden = enc(in_feats)

            di = torch.Tensor([-1]).unsqueeze(0).unsqueeze(0).to(device)
            dh = hidden.to(device)
                                
            for i in range(1000):
                do, dh = dec(di, dh)
            
                topv, topi = do.topk(1, dim=2)
                di = topi.detach().to(device)  # detach from history as input
                print(topv)
                if (topv == EOS):
                    break
                    
                if (i > 900):
                    print('reached 900')
            
            pred_in = torch.cat((in_feats, dh.squeeze(0)), dim=1).to(device)
            pred_time = pred(pred_in)
            trip_ids.append(trip_id[0])
            pred_times.append(pred_time.item())

    df_sample = pd.read_csv("data/sampleSubmission.csv")
    df_sample["TRAVEL_TIME"] = pred_times
    df_sample.to_csv('submission.csv', index=None)

In [26]:
submission(enc, dec, pred)

tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900


tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')
reached 900
tensor([[[0.7616]]], device='cuda:0')


tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.7616]]], device='cuda:0')
tensor([[[0.

KeyboardInterrupt: 