# LSTM with social

In [1]:
import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import time
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
train_path = "processed_train.npz"
train_data = np.load(train_path)
test_path = "processed_val_in.npz"
test_data = np.load(test_path)

In [3]:
class LSTMEncoder(nn.Module):
    def __init__(self, 
                 in_dim,
                 hidden_dim,
                 embed_dim,
                ):
        
        super(LSTMEncoder, self).__init__()
        
        self.linear = nn.Linear(in_dim, hidden_dim)
        self.lstm = nn.LSTMCell(hidden_dim, embed_dim)
        
    def forward(self, x, embedded):
        lx = F.relu(self.linear(x))
        embedded = self.lstm(lx, embedded)
        return embedded

class LSTMDecoder(nn.Module):
    def __init__(self, 
                 out_dim,
                 hidden_dim,
                 embed_dim,
                ):
        
        super(LSTMDecoder, self).__init__()
        
        self.linear1 = nn.Linear(out_dim, hidden_dim)
        self.lstm = nn.LSTMCell(hidden_dim, embed_dim)
        self.linear2 = nn.Linear(embed_dim, out_dim)
        
    def forward(self, x, hidden):
        lx = F.relu(self.linear1(x))
        hidden = self.lstm(lx, hidden)
        out = self.linear2(hidden[0])
        return out, hidden

In [4]:
batch_size = 512
in_dim = 5
out_dim = 2
hidden_dim = 8
embed_dim = 16
learning_rate = 0.01
decay_rate = 0.99
num_epoch = 1000
roll_outs = [1, 3, 10, 30]

In [5]:
# RNN, LSTM, 1dCNN, Transformer
encoder = LSTMEncoder(in_dim = in_dim,
               hidden_dim = hidden_dim,
               embed_dim = embed_dim).to(device) # move model to gpu 

decoder = LSTMDecoder(out_dim = out_dim,
               hidden_dim = hidden_dim,
               embed_dim = embed_dim).to(device) # move model to gpu 

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 

encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
encoder_scheduler = torch.optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=decay_rate)
decoder_scheduler = torch.optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=decay_rate)
loss_fun = nn.MSELoss()

In [6]:
def train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, loss_function, roll_out):
    train_mse = []
    shuffler = np.random.permutation(np.arange(390 * batch_size))
    for i in range(390):
        batch_ids = shuffler[i * batch_size:(i+1) * batch_size]
        inp = torch.from_numpy(train_data["X"][batch_ids]).float().to(device).reshape(batch_size, 19, -1)
        tgt = torch.from_numpy(train_data["y"][batch_ids]).float().to(device).reshape(batch_size, 30, 2)
        tgt = tgt[:, :roll_out, :]

        embedded_vec = (
            torch.zeros(batch_size, embed_dim).to(device),
            torch.zeros(batch_size, embed_dim).to(device),
        )
        for step in range(19):
            embedded_vec = encoder(inp[:, step, :], embedded_vec)
            
        pred = inp[:, -1, :2]
        loss = 0
        for step in range(roll_out):
            pred, embedded_vec = decoder(pred, embedded_vec)
            loss += loss_function(pred, tgt[:, step, :])
        loss = loss / roll_out
        
        train_mse.append(loss.item()) 
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    return train_mse

def eval_epoch(encoder, decoder, loss_function, roll_out):
    valid_mse = []
    preds = []
    trues = []
    with torch.no_grad():
        for i in range(390, 402):
            inp = torch.from_numpy(train_data["X"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 19, -1)
            tgt = torch.from_numpy(train_data["y"][i * batch_size:(i+1) * batch_size]).float().to(device).reshape(batch_size, 30, 2)
            tgt = tgt[:, :roll_out, :]
            
            embedded_vec = (
                torch.zeros(batch_size, embed_dim).to(device),
                torch.zeros(batch_size, embed_dim).to(device),
            )
            for step in range(19):
                embedded_vec = encoder(inp[:, step, :], embedded_vec)
            
            pred = inp[:, step, :2]
            loss = 0
            for step in range(roll_out):
                pred, embedded_vec = decoder(pred, embedded_vec)
                loss += loss_function(pred, tgt[:, step, :])
            loss = loss / roll_out
            valid_mse.append(loss.item())
            
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse

In [7]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8
worse_count = 0

roll_out_id = 0

for i in range(num_epoch):
    start = time.time()

    # model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, loss_fun, roll_outs[roll_out_id]))

    # model.eval()
    val_rmse = eval_epoch(encoder, decoder, loss_fun, roll_outs[roll_out_id])
    valid_rmse.append(val_rmse)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1]
        best_model = (encoder, decoder)
        worse_count = 0
        # torch.save([best_model, i, get_lr(optimizer)], name + ".pth")
    else:
        worse_count += 1
    
    if worse_count > 10:
        roll_out_id += 1
        worse_count = 0
        min_rmse = 10e8
        encoder, decoder = best_model
    
    if roll_out_id >= len(roll_outs):
        break

    end = time.time()
    
    # Early Stopping
    # if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
    #        break

    # Learning Rate Decay        
    encoder_scheduler.step()
    decoder_scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f} | Current Roll Out: {}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1], roll_outs[roll_out_id]))

Epoch 1 | T: 2.12 | Train RMSE: 5.02455 | Valid RMSE: 1.50566 | Current Roll Out: 1
Epoch 2 | T: 2.11 | Train RMSE: 1.31761 | Valid RMSE: 0.90033 | Current Roll Out: 1
Epoch 3 | T: 2.11 | Train RMSE: 0.93927 | Valid RMSE: 0.67411 | Current Roll Out: 1
Epoch 4 | T: 2.11 | Train RMSE: 0.75489 | Valid RMSE: 0.54766 | Current Roll Out: 1
Epoch 5 | T: 2.12 | Train RMSE: 0.63777 | Valid RMSE: 0.46529 | Current Roll Out: 1
Epoch 6 | T: 2.11 | Train RMSE: 0.55683 | Valid RMSE: 0.40940 | Current Roll Out: 1
Epoch 7 | T: 2.11 | Train RMSE: 0.49988 | Valid RMSE: 0.37164 | Current Roll Out: 1
Epoch 8 | T: 2.11 | Train RMSE: 0.45417 | Valid RMSE: 0.35803 | Current Roll Out: 1
Epoch 9 | T: 2.11 | Train RMSE: 0.42348 | Valid RMSE: 0.32756 | Current Roll Out: 1
Epoch 10 | T: 2.11 | Train RMSE: 0.39868 | Valid RMSE: 0.31626 | Current Roll Out: 1
Epoch 11 | T: 2.11 | Train RMSE: 0.37998 | Valid RMSE: 0.31515 | Current Roll Out: 1
Epoch 12 | T: 2.11 | Train RMSE: 0.36338 | Valid RMSE: 0.32287 | Current R

Epoch 98 | T: 2.16 | Train RMSE: 0.89774 | Valid RMSE: 0.87257 | Current Roll Out: 10
Epoch 99 | T: 2.16 | Train RMSE: 0.87920 | Valid RMSE: 0.87240 | Current Roll Out: 10
Epoch 100 | T: 2.16 | Train RMSE: 0.87843 | Valid RMSE: 0.86563 | Current Roll Out: 10
Epoch 101 | T: 2.16 | Train RMSE: 0.87222 | Valid RMSE: 0.86872 | Current Roll Out: 10
Epoch 102 | T: 2.16 | Train RMSE: 0.87066 | Valid RMSE: 0.87086 | Current Roll Out: 10
Epoch 103 | T: 2.16 | Train RMSE: 0.86694 | Valid RMSE: 0.86109 | Current Roll Out: 10
Epoch 104 | T: 2.16 | Train RMSE: 0.86487 | Valid RMSE: 0.87894 | Current Roll Out: 10
Epoch 105 | T: 2.17 | Train RMSE: 0.86365 | Valid RMSE: 0.87010 | Current Roll Out: 10
Epoch 106 | T: 2.16 | Train RMSE: 0.86446 | Valid RMSE: 0.88269 | Current Roll Out: 10
Epoch 107 | T: 2.16 | Train RMSE: 0.86021 | Valid RMSE: 0.89646 | Current Roll Out: 10
Epoch 108 | T: 2.16 | Train RMSE: 0.85115 | Valid RMSE: 0.86604 | Current Roll Out: 10
Epoch 109 | T: 2.16 | Train RMSE: 0.85960 | V

In [8]:
def inverse_transform_path(path, shift, rotation_matrix):
    if path.ndim == 2:
        return (np.linalg.inv(rotation_matrix) @ path.T).T + shift
    elif path.ndim == 3:
        path_normalize = np.zeros(path.shape)
        for i in range(path.shape[0]):
            path_normalize[i] = (np.linalg.inv(rotation_matrix) @ path[i].T).T + shift
        return path_normalize
    else:
        raise Exception("Invalid dimension")
        
encoder, decoder = best_model
test_preds = []
for i in tqdm(range(len(test_data["X"]))):
    inp = torch.from_numpy(test_data["X"][i]).float().to(device).unsqueeze(0)
    
    embedded_vec = (
        torch.zeros(1, embed_dim).to(device),
        torch.zeros(1, embed_dim).to(device),
    )
    for step in range(19):
        embedded_vec = encoder(inp[:, step, :], embedded_vec)

    preds = []
    pred = inp[:, step, :2]
    for step in range(30):
        pred, embedded_vec = decoder(pred, embedded_vec)
        preds.append(pred.cpu().data.numpy())

    preds = np.array(preds).reshape(30, 2)
    
    # De-Normalization !
    preds = inverse_transform_path(preds, test_data["shifts"][i], test_data["rotation_matrices"][i])
    test_preds.append(preds)

100%|██████████████████████████████████████████████████████████████████████████████| 3200/3200 [01:02<00:00, 51.61it/s]


In [9]:
# Convert to int
sample_sub = pd.read_csv('sample_submission.csv')
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1).astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df.to_csv('test_submission.csv', index=None)