**Model Config**
Use Encoder-Decoder model

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pandas as pd
import json
import time
import math
import random

import data_module as dm

from sklearn.model_selection import train_test_split

SOS = 0
EOS = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Data Preprocessing**

- Load CSV file dataset
- Create Torch Dataset
- Create Torch DataLoader
- Create padding func (with insertion of SOS and EOS tokens



In [2]:
train_dl, test_dl, train_len, test_len = dm.get_loader(use_embedding = True)

In [3]:
it = iter(train_dl)
fs = next(it)
tt = fs[1]
print(tt.shape)
tt = tt[:-25,:,:]
print(tt.shape)
print(tt[-1])

torch.Size([98, 64, 1])
torch.Size([73, 64, 1])
tensor([[1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.2060e+03],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [4.2000e+02],
        [8.0000e+01],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [9.8500e+02],
        [1.0000e+00],
        [1.0000e+00],
        [2.2000e+02],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
        [1.0000e+00],
      

**Model Building**
- Encoder-Decoder architecture
    - Encoder -> MLP or CNN
    - Decoder -> LSTM RNN
    - Batch Normalization in both


In [3]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        
        self.emb = nn.Embedding(len(list(dm.coord_2_idx.keys())), 256)
        self.gru = nn.GRU(256, 128)
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, in_feats, hidden):
        embedded = self.emb(in_feats.to(torch.int64)).to(device)
        embedded = F.relu(embedded.squeeze(2))
        do, hidden = self.gru(embedded, hidden.to(torch.float))
        do = self.softmax(do)
        
        return do, hidden
    
    def init_hidden(self, bs):
        return nn.init.xavier_normal_(torch.ones((1, bs, 128)))

In [4]:
class Pred(nn.Module):
    def __init__(self):
        super(Pred, self).__init__()
        
        self.fc1 = nn.Linear(594+128, 512) # Take in 594 feats from gru hidden plus categoricals
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc7 = nn.Linear(64, 1)
        self.sig = nn.Sigmoid()

        self.dropout = nn.Dropout(0.4)
    def forward(self, in_feats):
        x = self.dropout(F.relu(self.fc1(in_feats)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = (self.sig(self.fc7(x)) * 2 * 716.4264615618442) % (716.4264615618442 + 2*684.7511617508213)
        x = x - (x % 15)
        
        return x

In [5]:
class RMSE(nn.Module):
    def __init__(self):
        super(RMSE, self).__init__()
        
        self.crit = nn.MSELoss()
    
    def forward(self, x, y):
        return torch.sqrt(self.crit(x.squeeze(0).to(torch.float64), y.squeeze(0).to(torch.float64)))

**Model Training**

In [6]:
# Trainer helper functions from 
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#the-seq2seq-model
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s' % (asMinutes(s))

In [15]:
def train(in_feats, cl, tt, decoder, pred, dec_optim, pred_optim, criterion):
    dec_optim.zero_grad()
    pred_optim.zero_grad()
    
    in_feats = in_feats.to(device)
    cl = cl.to(device)
    tt = tt.to(device)
    
    loss = 0
    
    
    
    
    di = cl[0].unsqueeze(0).to(device)
    dh = decoder.init_hidden(64).to(device)
                    
    use_teacher_forcing = True if random.random() < 0 else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for i in range(len(cl) - 1):
            do, dh = decoder(di, dh)
            do = do.to(device)
            dh = dh.to(device)
            di = cl[i + 1].unsqueeze(0).to(device)  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for i in range(len(cl)):
            do, dh = decoder(di, dh)
            do = do.to(device)
            dh = dh.to(device)
            
            topv, topi = do.topk(1, dim=2)
            di = topi.detach().to(device)  # detach from history as input

    pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)

    pred_idx = pred(pred_in)

    loss += criterion(pred_idx, tt.unsqueeze(1).to(torch.int64))
    loss.backward()
        
    dec_optim.step()
    pred_optim.step()
    
    return loss.item()

In [16]:
def trainEpochs(decoder, predictor, n_epochs, print_every=1000, eval_every = 5, learning_rate=0.003):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    dec_optim = optim.AdamW(decoder.parameters(), lr=learning_rate)
    pred_optim = optim.AdamW(predictor.parameters(), lr=learning_rate)
    criterion = RMSE()
    #criterion = nn.CrossEntropyLoss()
    ds = optim.lr_scheduler.ReduceLROnPlateau(dec_optim, 'min', 0.25, 3)
    ps = optim.lr_scheduler.ReduceLROnPlateau(pred_optim, 'min', 0.25, 3)

    
    epoch_loss_max = math.inf
    
    it = iter(train_dl)
    d = next(it)

    for epoch in range(n_epochs):
        for i, data in enumerate(train_dl): # change back to train_dl
            if i > 1:
                break
            in_feats = d[0]

            if (in_feats.shape[0] != 64):
                continue

            cl = d[1]
            tt = d[2]
            loss = train(in_feats, cl, tt, decoder, pred, dec_optim, pred_optim, criterion)
            print_loss_total += loss


            if (i % print_every == 0) and (i != 0): # Change back to i
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0



                print('Epoch: %d Elapsed: %s Percent of epoch Complete: (%d%%) %.4f' % (epoch, timeSince(start, i / (train_len / 128)),
                                                                  i / (train_len / 64) * 100, print_loss_avg))


            if (i % eval_every == 0) and (i != 0):
                print('*****EVALUATING*****')
                eval_loss = eval_epoch(encoder, decoder, pred, epoch)
                ds.step(eval_loss)
                ps.step(eval_loss)





In [17]:
def evaluate(decoder, pred, in_feats, cl, tt, max_len=1500):
    with torch.no_grad():
        
        #eval_loss = RMSE()
        eval_loss = nn.CrossEntropyLoss()
        in_feats = in_feats.to(device)
        cl = cl.to(device)
        tt = tt.to(device)



        di = torch.zeros((1,64,1)).to(device)
        dh = decoder.init_hidden(64).to(device)

        for i in range(1000):
            do, dh = dec(di, dh)

            topv, topi = do.topk(1, dim=2)
            di = topi.detach().to(device)  # detach from history as input
            
        pred_in = torch.cat((in_feats.squeeze(0), dh.squeeze(0)), dim=1).to(device)
        pred_time = pred(pred_in)
        l = eval_loss(pred_time, tt.to(torch.int64))
        
        return l
    
def eval_epoch(decoder, pred, epoch):
    decoder.eval()
    pred.eval()

    accs = []
    for i, data in enumerate(test_dl):
        in_feats = data[0]
        if (in_feats.shape[0] != 64):
            continue
        cl = data[1]
        tt = data[2]
        accs.append(evaluate(decoder, pred, in_feats, cl, tt))
        
        if (i > 100):
            break
    
    epoch_acc = (sum(accs) / len(accs)) if len(accs) > 0 else 0
    print('Epoch: %d, Loss on test: %.4f' % (epoch, epoch_acc))
    decoder.train()
    pred.train()
    return epoch_acc

In [19]:
dec = Decoder().to(device)
pred = Pred().to(device)

trainEpochs(dec, pred, 100, print_every=1, eval_every = 500, learning_rate = 0.03)


Epoch: 0 Elapsed: 0m 0s Percent of epoch Complete: (0%) 927.4821
Epoch: 1 Elapsed: 0m 1s Percent of epoch Complete: (0%) 929.6275
Epoch: 2 Elapsed: 0m 1s Percent of epoch Complete: (0%) 925.9565
Epoch: 3 Elapsed: 0m 2s Percent of epoch Complete: (0%) 931.3788
Epoch: 4 Elapsed: 0m 2s Percent of epoch Complete: (0%) 930.1196
Epoch: 5 Elapsed: 0m 3s Percent of epoch Complete: (0%) 929.3916
Epoch: 6 Elapsed: 0m 3s Percent of epoch Complete: (0%) 925.7786
Epoch: 7 Elapsed: 0m 4s Percent of epoch Complete: (0%) 929.0744
Epoch: 8 Elapsed: 0m 4s Percent of epoch Complete: (0%) 931.4541
Epoch: 9 Elapsed: 0m 5s Percent of epoch Complete: (0%) 929.3873
Epoch: 10 Elapsed: 0m 5s Percent of epoch Complete: (0%) 929.1178
Epoch: 11 Elapsed: 0m 6s Percent of epoch Complete: (0%) 928.0962
Epoch: 12 Elapsed: 0m 6s Percent of epoch Complete: (0%) 924.8746
Epoch: 13 Elapsed: 0m 7s Percent of epoch Complete: (0%) 929.0654
Epoch: 14 Elapsed: 0m 7s Percent of epoch Complete: (0%) 930.1171
Epoch: 15 Elapsed: 0

KeyboardInterrupt: 

In [None]:
def submission(enc, dec, pred):
    test_dl, test_len = dm.get_loader(test=True)
    trip_ids = []
    pred_times = []
    
    with torch.no_grad():
        enc.eval()
        dec.eval()
        pred.eval()
        for i, (trip_id, in_feats) in enumerate(test_dl):
            in_feats = in_feats.to(device)
            
            hidden = enc(in_feats)

            di = torch.Tensor([0]).unsqueeze(0).unsqueeze(0).to(device)
            dh = hidden.to(device)
                                
            for i in range(150):
                do, dh = dec(di, dh)
            
                topv, topi = do.topk(1, dim=2)
                di = topi.detach().to(device)  # detach from history as input
                if (topi == i):
                    break
                    
                if (i > 145):
                    print('reached 150')
            
            pred_in = torch.cat((in_feats, dh.squeeze(0)), dim=1).to(device)
            pred_idx = pred(pred_in)
            
            topv, topi = pred_idx.topk(1, dim=1)
            pred_time = dm.i2t[topi.item()]
            
            trip_ids.append(trip_id[0])
            pred_times.append(pred_time)

    df_sample = pd.read_csv("data/sampleSubmission.csv")
    df_sample["TRAVEL_TIME"] = pred_times
    df_sample.to_csv('submission.csv', index=None)

In [29]:
submission(enc, dec, pred)

NameError: name 'submission' is not defined