**Model Config**
Use Encoder-Decoder model

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pandas as pd
import json
import time
import math
import random

import data_module as dm

from sklearn.model_selection import train_test_split

SOS = 0
EOS = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


**Data Preprocessing**

- Load CSV file dataset
- Create Torch Dataset
- Create Torch DataLoader
- Create padding func (with insertion of SOS and EOS tokens



In [2]:
train_dl, test_dl, train_len, test_len = dm.get_loader()

In [3]:
it = iter(train_dl)
fs = next(it)
fs[1][3]

0.714544553675216
0.7147042869721002
0.7152992678686013
0.7157691516144529
0.7158640362264587
0.7184338336969268
0.7184332325845597
0.7184166664027718
0.7184190267107762
0.7184198134816941
0.7224914225019586
0.7221516882201611
0.7224492155664712
0.7228669847956202
0.7228608816884274
0.7231692316838167
0.7227879287091186
0.722410758929971
0.722520916725233
0.7225518670181337
0.7176196584938573
0.7176090168230379
0.7176052670511347
0.7175779082846632
0.7178457366487111
0.7179551680399634
0.7179650241575571
0.7178104682236692
0.7177887547851597
0.717785968154668
0.7148446960934627
0.7149688037144193
0.7156476989917087
0.7161612027095435
0.7166501109273471
0.7239279854512886
0.7239272042237009
0.723930959539793
0.723930959539793
0.7239218864540492
0.7249204719276067
0.7248781190568432
0.7249464344297181
0.7256399570038511
0.7260931026708923
0.7275070457521197
0.7282039836917235
0.7286573025886407
0.7290586780441466
0.7291242842606703
0.7239000342576795
0.7239138068963643
0.7240917824774973

RuntimeError: Cannot pack empty tensors.

**Model Building**
- Encoder-Decoder architecture
    - Encoder -> MLP or CNN
    - Decoder -> LSTM RNN
    - Batch Normalization in both


In [4]:
""" Model Architectures """

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        
        self.fc1 = nn.Linear(594, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512,256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, 256)
        
        self.dropout = nn.Dropout(0.4)
    
    def forward(self, input):
        x = self.dropout(F.relu(self.bn1(self.fc1(input.float()))))
        x = self.dropout(F.relu(self.bn2(self.fc2(x))))
        x = F.relu(self.bn3(self.fc3(x)))
        x = F.relu(self.fc4(x))
        x = x.view(2, -1, 128)

        return x

In [5]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        
        self.gru = nn.GRU(1, 128, num_layers=2)
        
    def forward(self, in_feats, hidden):
        do, hidden = self.gru(in_feats.to(torch.float), hidden.to(torch.float))
        do = torch.sigmoid(do)
        
        return do, hidden

In [6]:
class Pred(nn.Module):
    def __init__(self):
        super(Pred, self).__init__()
        
        self.fc1 = nn.Linear(256+594, 512) # Take in 128 feats from gru hidden plus categoricals
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc7 = nn.Linear(256, 1)
        self.sig = nn.Sigmoid()

        self.dropout = nn.Dropout(0.4)
    def forward(self, in_feats):
        x = self.dropout(F.relu(self.fc1(in_feats)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = (self.sig(self.fc7(x)) * 2 * 716.4264615618442) % (716.4264615618442 + 2*684.7511617508213)
        x = x - (x % 15)
        
        return x

In [7]:
class RMSE(nn.Module):
    def __init__(self):
        super(RMSE, self).__init__()
        
        self.crit = nn.MSELoss()
    
    def forward(self, x, y):
        return torch.sqrt(self.crit(x.squeeze(0).to(torch.float64), y.squeeze(0).to(torch.float64)))

**Model Training**

In [8]:
# Trainer helper functions from 
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#the-seq2seq-model
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s' % (asMinutes(s))

In [9]:
def train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion):
    enc_optim.zero_grad()
    dec_optim.zero_grad()
    pred_optim.zero_grad()
    
    in_feats = in_feats.to(device)
    cl = cl.to(device)
    tt = tt.to(device)
    
    loss = 0
    
    hidden = encoder(in_feats)
    
    
    di = cl[0].unsqueeze(0).to(device)
    dh = hidden.to(device)
                    
    use_teacher_forcing = True if random.random() < 0.5 else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for i in range(len(cl) - 1):
            do, dh = decoder(di, dh)
            di = cl[i + 1].unsqueeze(0).to(device)  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            
            topv, topi = do.topk(1, dim=2)
            di = topv.detach().to(device)  # detach from history as input


    dh = dh.reshape((1, 64, 256))
    pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)

    pred_time = pred(pred_in)
    loss += criterion(pred_time, tt.unsqueeze(0).unsqueeze(-1))
    loss.backward()
        
    enc_optim.step()
    dec_optim.step()
    pred_optim.step()
    
    return loss.item()

In [10]:
def trainEpochs(encoder, decoder, predictor, n_epochs, print_every=1000, eval_every = 5, learning_rate=0.003):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    enc_optim = optim.Adam(encoder.parameters(), lr=learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate)
    pred_optim = optim.Adam(predictor.parameters(), lr=learning_rate)
    criterion = RMSE()
    es = optim.lr_scheduler.ReduceLROnPlateau(enc_optim, 'min', 0.25, 3)
    ds = optim.lr_scheduler.ReduceLROnPlateau(dec_optim, 'min', 0.25, 3)
    ps = optim.lr_scheduler.ReduceLROnPlateau(pred_optim, 'min', 0.25, 3)

    
    epoch_loss_max = math.inf
    
    for epoch in range(n_epochs):
        for i, data in enumerate(train_dl):

            in_feats = data[0] # back to data

            if (in_feats.shape[0] != 64):
                continue

            cl = data[1] # back to data
            tt = data[2] # back to data
            loss = train(in_feats, cl, tt, encoder, decoder, pred, enc_optim, dec_optim, pred_optim, criterion)
            print_loss_total += loss


            if (i % 1000 == 0):
                if (print_loss_total < epoch_loss_max):
                    epoch_loss_max = print_loss_total
                    torch.save({
                            'epoch': epoch,
                            'encoder_state_dict': encoder.state_dict(),
                            'encoder_optimizer_state_dict': enc_optim.state_dict(),
                            'decoder_state_dict': decoder.state_dict(),
                            'decoder_optimizer_state_dict': dec_optim.state_dict(),
                            'loss': print_loss_total,
                            }, 'model.pt')

            if (i % print_every == 0) and (i != 0): # Change back to i
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0



                print('Epoch: %d Elapsed: %s Percent of epoch Complete: (%d%%) %.4f' % (epoch, timeSince(start, i / (train_len / 128)),
                                                                  i / (train_len / 64) * 100, print_loss_avg))


            if (i % eval_every == 0) and (i != 0):
                print('*****EVALUATING*****')
                eval_loss = eval_epoch(encoder, decoder, pred, epoch)
                es.step(eval_loss)
                ds.step(eval_loss)
                ps.step(eval_loss)





In [11]:
def evaluate(encoder, decoder, pred, in_feats, cl, tt, max_len=1500):
    with torch.no_grad():
        
        eval_loss = RMSE()
        in_feats = in_feats.to(device)
        cl = cl.to(device)
        tt = tt.to(device)

        hidden = encoder(in_feats)


        di = torch.zeros((1,64,1)).to(device)
        dh = hidden.to(device)

        for i in range(1000):
            do, dh = dec(di, dh)

            topv, topi = do.topk(1, dim=2)
            di = topv.detach().to(device)  # detach from history as input
            
        dh = dh.reshape((1, 64, 256))

        pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)
        pred_time = pred(pred_in)
        
        l = eval_loss(pred_time, tt.unsqueeze(0).unsqueeze(-1))
        
        return l
    
def eval_epoch(encoder, decoder, pred, epoch):
    encoder.eval()
    decoder.eval()
    pred.eval()

    accs = []
    for i, data in enumerate(test_dl):
        in_feats = data[0]
        if (in_feats.shape[0] != 64):
            continue
        cl = data[1]
        tt = data[2]
        accs.append(evaluate(encoder, decoder, pred, in_feats, cl, tt))
        
        if (i > 100):
            break
    
    epoch_acc = (sum(accs) / len(accs)) if len(accs) > 0 else 0
    print('Epoch: %d, Loss on test: %.4f' % (epoch, epoch_acc))
    encoder.train()
    decoder.train()
    pred.train()
    return epoch_acc

In [12]:
enc = Encoder().to(device)
dec = Decoder().to(device)
pred = Pred().to(device)

trainEpochs(enc, dec, pred, 1, print_every=50, eval_every = 250, learning_rate = 0.003)


0.7143064609935832
0.7138260671656798
0.7135311098847475
0.7135201085461105
0.7135793998444172
0.7157057509144855
0.7157012155392386
0.7156974655008054
0.7156391143139951
0.71550069703607
0.7184567710920018
0.7184703764316986
0.7184780561465574
0.7186238485992814
0.7188435871212454
0.7666922472521916
0.7666839642338565
0.766685343560527
0.7666924416403339
0.7667034832248594
0.7323706474580705
0.7325533753230091
0.732563233054637
0.7325829485178336
0.7323401133521246
0.7668836712102534
0.7668560705153553
0.7668613932407946
0.7668606045220994
0.766857647954023
0.7159776816295788
0.7159851796198943
0.7159747782898499
0.7159057837771866
0.7158890340758429
0.7227651717195462
0.722762979231253
0.7227592240599826
0.7227562502364645
0.7227578129316898
0.7305073761906825
0.7305392630694546
0.7307578244115643
0.7314857922167634
0.7319544630461703
0.7675529370771463
0.7671533499241544
0.7671239781115116
0.7671135310528889
0.767098351580878
0.721667308843356
0.7216217013783107
0.721588840878164
0.

RuntimeError: Cannot pack empty tensors.

In [24]:
def submission(enc, dec, pred):
    test_dl, test_len = dm.get_loader(test=True)
    trip_ids = []
    pred_times = []
    
    with torch.no_grad():
        enc.eval()
        dec.eval()
        pred.eval()
        for i, (trip_id, in_feats) in enumerate(test_dl):
            in_feats = in_feats.to(device)
            
            hidden = enc(in_feats)

            di = torch.Tensor([0]).unsqueeze(0).unsqueeze(0).to(device)
            dh = hidden.to(device)
                                
            for i in range(200):
                do, dh = dec(di, dh)
            
                topv, topi = do.topk(1, dim=2)
                di = topv.detach().to(device)  # detach from history as input
                if (topv == EOS):
                    break
            
            dh = dh.reshape((1, 1, 256))
            pred_in = torch.cat((in_feats, dh.squeeze(0)), dim=1).to(device)
            pred_time = pred(pred_in)
            trip_ids.append(trip_id[0])
            pred_times.append(pred_time.item())

    df_sample = pd.read_csv("data/sampleSubmission.csv")
    df_sample["TRAVEL_TIME"] = pred_times
    df_sample.to_csv('submission.csv', index=None)

In [25]:
submission(enc, dec, pred)