**Model Config**
Use Encoder-Decoder model

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pandas as pd
import json
import time
import math
import random

import data_module as dm

from sklearn.model_selection import train_test_split

SOS = -1
EOS = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Data Preprocessing**

- Load CSV file dataset
- Create Torch Dataset
- Create Torch DataLoader
- Create padding func (with insertion of SOS and EOS tokens



In [2]:
train_dl, test_dl, train_len, test_len = dm.get_loader()

**Model Building**
- Encoder-Decoder architecture
    - Encoder -> MLP or CNN
    - Decoder -> LSTM RNN
    - Batch Normalization in both


In [3]:
""" Model Architectures """

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        
        self.fc1 = nn.Linear(594, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256,128)
        self.fc4 = nn.Linear(128, 128)
        self.gru = nn.GRU(128, 128)
        
        self.dropout = nn.Dropout(0.4)
    
    def forward(self, input):
        x = self.dropout(F.relu(self.fc1(input.float())))
        x = F.relu(self.fc2(x))
        x = self.dropout(F.relu(self.fc3(x)))
        x = F.relu(self.fc4(x))
        x = x.view(1, -1, 128)
        _, hidden = self.gru(x) # Input should be [1, 128, 128]
        
        return hidden

In [4]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        
        self.gru = nn.GRU(1, 128)
        
    def forward(self, in_feats, hidden):
        do, hidden = self.gru(in_feats.to(torch.float), hidden.to(torch.float))
        
        return do, hidden

In [5]:
class Pred(nn.Module):
    def __init__(self):
        super(Pred, self).__init__()
        
        self.fc1 = nn.Linear(128, 128)
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64, 1)
        self.sig = nn.Sigmoid()
        
    def forward(self, in_feats):
        x = F.relu(self.fc1(in_feats))
        x = F.relu(self.fc2(x))
        x = (self.sig(self.fc3(x)) * 3 * 716.4264615618442) % (716.4264615618442 + 2*684.7511617508213)
        
        return x

In [6]:
class RMSE(nn.Module):
    def __init__(self):
        super(RMSE, self).__init__()
        
        self.crit = nn.MSELoss()
    
    def forward(self, x, y):
        return torch.sqrt(self.crit(x.to(torch.float64), y.to(torch.float64)))

**Model Training**

In [7]:
# Trainer helper functions from 
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#the-seq2seq-model
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s' % (asMinutes(s))

In [20]:
def train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion):
    enc_optim.zero_grad()
    dec_optim.zero_grad()
    pred_optim.zero_grad()
    
    in_feats = in_feats.to(device)
    cl = cl.to(device)
    tt = tt.to(device)
    
    loss = 0
    
    hidden = encoder(in_feats)
    
    
    di = cl[0].unsqueeze(0).to(device)
    dh = hidden.to(device)
                    
    use_teacher_forcing = True if random.random() < 0.5 else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            di = cl[i].unsqueeze(0).to(device)  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            
            topv, topi = do.topk(1, dim=2)
            print(topv)
            di = topi.detach().to(device)  # detach from history as input


    pred_time = pred(dh)
    loss += criterion(pred_time, tt.unsqueeze(0).unsqueeze(-1))
    loss.backward()
    
    nn.utils.clip_grad_norm_(encoder.parameters(), 5)
    nn.utils.clip_grad_norm_(decoder.parameters(), 5)
    
    enc_optim.step()
    dec_optim.step()
    pred_optim.step()
    
    return loss.item()

In [21]:
def trainEpochs(encoder, decoder, predictor, n_epochs, print_every=1000, eval_every = 2500, learning_rate=0.003):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    enc_optim = optim.AdamW(encoder.parameters(), lr=learning_rate)
    dec_optim = optim.AdamW(decoder.parameters(), lr=learning_rate)
    pred_optim = optim.AdamW(predictor.parameters(), lr=learning_rate)
    criterion = RMSE()
    es = optim.lr_scheduler.ReduceLROnPlateau(enc_optim, 'min', 0.25, 3)
    ds = optim.lr_scheduler.ReduceLROnPlateau(dec_optim, 'min', 0.25, 3)
    ps = optim.lr_scheduler.ReduceLROnPlateau(pred_optim, 'min', 0.25, 3)

    
    epoch_loss_max = math.inf

    for epoch in range(n_epochs):
        for i, data in enumerate(train_dl):
            in_feats = data[0]
            
            if (in_feats.shape[0] != 128):
                continue
            
            cl = data[1]
            tt = data[2]
            loss = train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion)
            print_loss_total += loss
            
            
            if (i % 1000 == 0):
                if (print_loss_total < epoch_loss_max):
                    epoch_loss_max = print_loss_total
                    torch.save({
                            'epoch': epoch,
                            'encoder_state_dict': encoder.state_dict(),
                            'encoder_optimizer_state_dict': enc_optim.state_dict(),
                            'decoder_state_dict': decoder.state_dict(),
                            'decoder_optimizer_state_dict': dec_optim.state_dict(),
                            'loss': print_loss_total,
                            }, 'model.pt')

            if (i % print_every == 0) and (i != 0):
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                
                es.step(print_loss_avg)
                ds.step(print_loss_avg)                

                
                print('Epoch: %d Elapsed: %s Percent of epoch Complete: (%d%%) %.4f' % (epoch, timeSince(start, i / (train_len / 128)),
                                                                  i / (train_len / 128) * 100, print_loss_avg))
                
            """
            if (i % eval_every == 0) and (i != 0):
                print('*****EVALUATING*****')
                eval_loss = eval_epoch(encoder, decoder, epoch)

            """
                
                        


In [22]:
def evaluate(encoder, decoder, pred, in_feats, cl, tt, max_len=1500):
    with torch.no_grad():
        eval_loss = RMSE()
        in_feats = in_feats.to(device)
        cl = cl.to(device)
        tt = tt.to(device)
        output_lens = torch.zeros(cl.shape[1])

        hidden = encoder(in_feats)

        di = -1 * torch.ones((1, 128, 1)).to(device)
        dh = hidden.to(device)
        dc = cell.to(device)

        x = decoder(di, (dh, dc))
        
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            di = di.detach()  # detach from history as input
            
            
            
        l = eval_loss(x, tt)
        
        return l
    
def eval_epoch(encoder, decoder, en):
    accs = []
    for i, data in enumerate(test_dl):
        in_feats = data[0]
        if (in_feats.shape[0] != 128):
            continue
        cl = data[1]
        tt = data[2]
        accs.append(evaluate(encoder, decoder, in_feats, cl, tt))
        
        if (i > (test_len / 1000)):
            break
    
    epoch_acc = (sum(accs) / len(accs)) if len(accs) > 0 else 0
    print('Epoch: %d, Loss on test: %.4f' % (en, epoch_acc))
    return epoch_acc

In [23]:
enc = Encoder().to(device)
dec = Decoder().to(device)
pred = Pred().to(device)

trainEpochs(enc, dec, pred, 1, print_every=50, learning_rate = 0.0003)


tensor([[[0.0750],
         [0.0758],
         [0.0740],
         [0.0753],
         [0.0765],
         [0.0747],
         [0.0742],
         [0.0740],
         [0.0755],
         [0.0758],
         [0.0739],
         [0.0751],
         [0.0745],
         [0.0787],
         [0.0751],
         [0.0764],
         [0.0736],
         [0.0753],
         [0.0737],
         [0.0723],
         [0.0768],
         [0.0737],
         [0.0749],
         [0.0738],
         [0.0745],
         [0.0747],
         [0.0740],
         [0.0751],
         [0.0749],
         [0.0767],
         [0.0760],
         [0.0757],
         [0.0722],
         [0.0748],
         [0.0747],
         [0.0747],
         [0.0747],
         [0.0757],
         [0.0745],
         [0.0767],
         [0.0745],
         [0.0765],
         [0.0752],
         [0.0747],
         [0.0761],
         [0.0742],
         [0.0740],
         [0.0744],
         [0.0753],
         [0.0745],
         [0.0733],
         [0.0758],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

tensor([[[0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.9619],
         [0.

KeyboardInterrupt: 

In [18]:
def submission(enc, dec, pred):
    test_dl, test_len = dm.get_loader(test=True)
    trip_ids = []
    pred_times = []
    
    with torch.no_grad():
        for i, (trip_id, in_feats) in enumerate(test_dl):
            in_feats = in_feats.to(device)
            
            hidden = enc(in_feats)

            di = torch.Tensor([-1]).unsqueeze(0).unsqueeze(0).to(device)
            dh = hidden.to(device)
                                
            for i in range(500):
                do, dh = dec(di, dh)
            
                topv, topi = do.topk(1, dim=2)
                di = topi.detach().to(device)  # detach from history as input
                if (topv == EOS):
                    break

                    
            pred_time = pred(dh)
            trip_ids.append(trip_id[0])
            pred_times.append(pred_time.item())

    df_sample = pd.read_csv("data/sampleSubmission.csv")
    df_sample["TRAVEL_TIME"] = pred_times
    df_sample.to_csv('submission.csv', index=None)

In [19]:
submission(enc, dec, pred)