# LSTM

In [1]:
import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import time
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
train_path = "processed_training.npz"
train_data = np.load(train_path)
#test_path = "processed_testing.npz"
#test_data = np.load(test_path)

In [3]:
class LSTMEncoder(nn.Module):
    def __init__(self, 
                 in_dim,
                 hidden_dim,
                 embed_dim,
                ):
        
        super(LSTMEncoder, self).__init__()
        
        self.linear = nn.Linear(in_dim, hidden_dim)
        self.lstm = nn.LSTMCell(hidden_dim, embed_dim)
        
    def forward(self, x, embedded):
        lx = F.relu(self.linear(x))
        embedded = self.lstm(lx, embedded)
        return embedded

class LSTMDecoder(nn.Module):
    def __init__(self, 
                 out_dim,
                 hidden_dim,
                 embed_dim,
                ):
        
        super(LSTMDecoder, self).__init__()
        
        self.linear1 = nn.Linear(out_dim, hidden_dim)
        self.lstm = nn.LSTMCell(hidden_dim, embed_dim)
        self.linear2 = nn.Linear(embed_dim, out_dim)
        
    def forward(self, x, hidden):
        lx = F.relu(self.linear1(x))
        hidden = self.lstm(lx, hidden)
        out = self.linear2(hidden[0])
        return out, hidden

In [4]:
batch_size = 512
in_dim = 2
out_dim = 2
hidden_dim = 8
embed_dim = 16
learning_rate = 0.01
decay_rate = 0.98
num_epoch = 1000

In [5]:
# RNN, LSTM, 1dCNN, Transformer
encoder = LSTMEncoder(in_dim = in_dim,
               hidden_dim = hidden_dim,
               embed_dim = embed_dim).to(device) # move model to gpu 

decoder = LSTMDecoder(out_dim = out_dim,
               hidden_dim = hidden_dim,
               embed_dim = embed_dim).to(device) # move model to gpu 

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 

encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
encoder_scheduler = torch.optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=decay_rate)
decoder_scheduler = torch.optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=decay_rate)
loss_fun = nn.MSELoss()

In [6]:
def train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, loss_function, roll_out):
    train_mse = []
    for i in range(370):
        inp = torch.from_numpy(train_data["X"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 19, 4)
        inp = inp[:, :, :2]
        tgt = torch.from_numpy(train_data["y"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 30, 2)
        tgt = tgt[:, :roll_out, :]
        
        embedded_vec = (
            torch.zeros(batch_size, embed_dim).to(device),
            torch.zeros(batch_size, embed_dim).to(device),
        )
        for step in range(19):
            embedded_vec = encoder(inp[:, step, :], embedded_vec)
            
        pred = inp[:, -1, :]
        loss = 0
        for step in range(roll_out):
            pred, hidden = decoder(pred, embedded_vec)
            loss += loss_function(pred, tgt[:, step, :])
        loss = loss / roll_out
        
        train_mse.append(loss.item()) 
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    return train_mse

def eval_epoch(encoder, decoder, loss_function, roll_out):
    valid_mse = []
    preds = []
    trues = []
    with torch.no_grad():
        for i in range(370,402):
            inp = torch.from_numpy(train_data["X"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 19, 4)
            inp = inp[:, :, :2]
            tgt = torch.from_numpy(train_data["y"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 30, 2)
            tgt = tgt[:, :roll_out, :]
            
            embedded_vec = (
                torch.zeros(batch_size, embed_dim).to(device),
                torch.zeros(batch_size, embed_dim).to(device),
            )
            for step in range(19):
                embedded_vec = encoder(inp[:, step, :], embedded_vec)
            
            pred = inp[:, step, :]
            loss = 0
            for step in range(roll_out):
                pred, hidden = decoder(pred, embedded_vec)
                loss += loss_function(pred, tgt[:, step, :])
            loss = loss / roll_out
            valid_mse.append(loss.item())
            
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse

In [7]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8
worse_count = 0

roll_outs = [1, 5, 10, 30]
roll_out_id = 0

for i in range(num_epoch):
    start = time.time()

    # model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, loss_fun, roll_outs[roll_out_id]))

    # model.eval()
    val_rmse = eval_epoch(encoder, decoder, loss_fun, roll_outs[roll_out_id])
    valid_rmse.append(val_rmse)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1]
        best_model = (encoder, decoder)
        worse_count = 0
        # torch.save([best_model, i, get_lr(optimizer)], name + ".pth")
    else:
        worse_count += 1
    
    if worse_count > 3:
        roll_out_id += 1
        worse_count = 0
        min_rmse = 10e8
    
    if roll_out_id >= len(roll_outs):
        break

    end = time.time()
    
    # Early Stopping
    # if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
    #        break

    # Learning Rate Decay        
    encoder_scheduler.step()
    decoder_scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f} | Current Roll Out: {}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1], roll_outs[roll_out_id]))

Epoch 1 | T: 1.86 | Train RMSE: 0.09854 | Valid RMSE: 0.01193 | Current Roll Out: 1
Epoch 2 | T: 1.86 | Train RMSE: 0.01169 | Valid RMSE: 0.01094 | Current Roll Out: 1
Epoch 3 | T: 1.85 | Train RMSE: 0.01126 | Valid RMSE: 0.01069 | Current Roll Out: 1
Epoch 4 | T: 1.85 | Train RMSE: 0.01090 | Valid RMSE: 0.01001 | Current Roll Out: 1
Epoch 5 | T: 1.85 | Train RMSE: 0.01059 | Valid RMSE: 0.00929 | Current Roll Out: 1
Epoch 6 | T: 1.85 | Train RMSE: 0.01004 | Valid RMSE: 0.00821 | Current Roll Out: 1
Epoch 7 | T: 1.85 | Train RMSE: 0.00935 | Valid RMSE: 0.00867 | Current Roll Out: 1
Epoch 8 | T: 1.85 | Train RMSE: 0.00921 | Valid RMSE: 0.00799 | Current Roll Out: 1
Epoch 9 | T: 1.85 | Train RMSE: 0.00901 | Valid RMSE: 0.00782 | Current Roll Out: 1
Epoch 10 | T: 1.85 | Train RMSE: 0.00890 | Valid RMSE: 0.00781 | Current Roll Out: 1
Epoch 11 | T: 1.85 | Train RMSE: 0.00882 | Valid RMSE: 0.00786 | Current Roll Out: 1
Epoch 12 | T: 1.85 | Train RMSE: 0.00878 | Valid RMSE: 0.00818 | Current R

In [8]:
test_path = "./val_in/"
test_pkl_list = glob(os.path.join(test_path, '*'))
test_pkl_list.sort()

test_preds = []
for idx in range(len(test_pkl_list)):
    with open(test_pkl_list[idx], 'rb') as f:
        test_sample = pickle.load(f)
        pred_id = np.where(test_sample["track_id"] == test_sample['agent_id'])[0][0]
        car_mask = np.where(test_sample["car_mask"] == 1)[0]
        inp_scene = test_sample['p_in'][car_mask]

        # Normalization 
        min_vecs = np.min(inp_scene, axis = (0,1))
        max_vecs = np.max(inp_scene, axis = (0,1))
        
        inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
        inp = torch.from_numpy(inp).float().to(device).unsqueeze(0)
        
        encoder, decoder = best_model
        embedded_vec = (
            torch.zeros(1, embed_dim).to(device),
            torch.zeros(1, embed_dim).to(device),
        )
        for step in range(19):
            embedded_vec = encoder(inp[:, step, :], embedded_vec)
        
        preds = []
        pred = inp[:, step, :]
        for step in range(30):
            pred, hidden = decoder(pred, embedded_vec)
            preds.append(pred.cpu().data.numpy())
        
        preds = np.array(preds).reshape(30, 2)
        
        # De-Normalization !
        preds = preds * (max_vecs - min_vecs) +  min_vecs
        test_preds.append(preds)

In [9]:
# Convert to int
sample_sub = pd.read_csv('sample_submission.csv')
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1).astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df.to_csv('test_submission.csv', index=None)