In [1]:
import os, os.path 
import numpy 
from torch.utils.data import Dataset, DataLoader
import pickle
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
import matplotlib
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data import TensorDataset, DataLoader

"""Change to the data folder"""
train_path = "./new_train/new_train"
test_path = './new_val_in/new_val_in'
matplotlib.rcParams['savefig.dpi'] = 300

In [2]:
class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
    def __len__(self): #len(val_dataset)
        return len(self.pkl_list)
    def __getitem__(self, idx): #val_dataset[0]
        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        if self.transform:
            data = self.transform(data)
        return data

In [3]:
train_dataset = ArgoverseDataset(data_path=train_path) 
test_dataset  = ArgoverseDataset(data_path=test_path) 

# Data Preprocessing

In [4]:
#diff points
train = []
for i in tqdm(range((len(train_dataset)))):
    val = train_dataset[i]
    c_filter = val['car_mask'].reshape(-1) == 1
    p_in = val['p_in'][c_filter]
    p_out = val['p_out'][c_filter]
    for c in range(len(p_in)):
        in_out = np.diff(np.vstack((p_in[c], p_out[c])), axis=0).flatten()
        train.append(in_out)
train = np.array(train)

HBox(children=(FloatProgress(value=0.0, max=205942.0), HTML(value='')))




In [None]:
#train = np.genfromtxt('preprocessed_train.csv',delimiter =',')

## Get test set data, and initial position

In [5]:
#create only targeted agent list
init_p = []
in_test = []
for i in tqdm(range((len(test_dataset)))):
    val = test_dataset[i]
    c_filter = val['agent_id'] == val['track_id'][:, 0, 0]
    p_in = val['p_in'][c_filter]
    v_in = (val['v_in'][c_filter][:, :, 0]**2 + val['v_in'][c_filter][:, :, 1] ** 2)**0.5
    for c in range(len(p_in)):
        in_test.append(
            np.diff(p_in[c], axis=0).flatten()
        )
        init_p.append(
            p_in[c][-1]
        )
in_test = np.array(in_test)
init_p = np.array(init_p)

HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




In [6]:
#np.savetxt("preprocessed_train.csv", train, delimiter=",")

In [9]:
#Step up device: cuda or cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Creating model workflow (this session is only for testing, skip to the actual training below)

In [117]:
in_train = torch.tensor(train[:, :36]).type(torch.FloatTensor).to(device)
out_train = torch.tensor(train[:, 36:]).type(torch.FloatTensor).to(device)

in_test = torch.tensor(in_test).type(torch.FloatTensor).to(device)

In [122]:
trainloader = DataLoader(TensorDataset(in_train[:, :36], out_train[:, :1]),batch_size= 512, drop_last=False, shuffle=True)

In [123]:
class Net(nn.Module):
    
    def __init__(self):
        
        super().__init__()

        self.lin1 = nn.Linear(36, 18)
        self.lin2 = nn.Linear(18, 6)
        self.lin3 = nn.Linear(6, 1)
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        return self.activation(self.lin3(x))
    

net = Net().to(device)

In [126]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
#optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for epoch in range(20): 
    
    net.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data 
        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = net(inputs).to(device)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i == 3539:    # print every 2000 mini-batches
            print('[Epoch %d, %5d] loss: %.8f' %
                  (epoch + 1, i + 1, running_loss / 3540))
            running_loss = 0.0

[Epoch 1,  3540] loss: 0.12302844
[Epoch 2,  3540] loss: 0.12289178
[Epoch 3,  3540] loss: 0.12291648
[Epoch 4,  3540] loss: 0.12286522
[Epoch 5,  3540] loss: 0.12287612
[Epoch 6,  3540] loss: 0.12283727
[Epoch 7,  3540] loss: 0.12280002
[Epoch 8,  3540] loss: 0.12282911
[Epoch 9,  3540] loss: 0.12287485
[Epoch 10,  3540] loss: 0.12282326
[Epoch 11,  3540] loss: 0.12277096
[Epoch 12,  3540] loss: 0.12275954
[Epoch 13,  3540] loss: 0.12277921
[Epoch 14,  3540] loss: 0.12274584
[Epoch 15,  3540] loss: 0.12277067
[Epoch 16,  3540] loss: 0.12278083
[Epoch 17,  3540] loss: 0.12277364
[Epoch 18,  3540] loss: 0.12275211
[Epoch 19,  3540] loss: 0.12280717
[Epoch 20,  3540] loss: 0.12276342


In [84]:
net(in_test).to('cpu').detach().numpy()

array([[9.0517998e-01],
       [2.0592444e-02],
       [2.4094157e-01],
       ...,
       [6.0908645e-10],
       [1.4381190e-03],
       [6.5948501e-02]], dtype=float32)

# Training

## Auto-regression model set up

In [129]:
class Net(nn.Module):
    
    def __init__(self, column_size):
        
        super().__init__()
        
        self.size_lin1 = column_size // 2
        self.size_lin2 = self.size_lin1 // 3

        self.lin1 = nn.Linear(column_size, self.size_lin1)
        self.lin2 = nn.Linear(self.size_lin1, self.size_lin2)
        self.lin3 = nn.Linear(self.size_lin2, 1)
        
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        return self.activation(self.lin3(x))
    

## Training with auto regression

In [130]:
net_list = []

for i in range(36, 36+60):
    
    net_list.append(Net(i).to(device))
    
for num in tqdm(range(60)):

    in_train = torch.tensor(train[:, :36 + num]).type(torch.FloatTensor).to(device)
    out_train = torch.tensor(train[:, 36: 36+ 1 + num]).type(torch.FloatTensor).to(device)

    trainloader = DataLoader(TensorDataset(in_train[:, :36 + num], out_train[:, num: num+1]),
                             batch_size= 512, drop_last=False, shuffle = True)
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net_list[num].parameters(), lr=0.05)

    for epoch in range(5): 
    
        net_list[num].train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data 
            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = net_list[num](inputs).to(device)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
#             # print statistics
#             running_loss += loss.item()
#             if i == 3539:    # print every 2000 mini-batches
#                 print('[Epoch %d, %5d] loss: %.8f' %
#                       (epoch + 1, i + 1, running_loss / 3540))
#                 running_loss = 0.0

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))




## Prediction

first of all, in_test should always be a numpy array

In [131]:
#re run it for numpy array
init_p = []
in_test = []
for i in tqdm(range((len(test_dataset)))):
    val = test_dataset[i]
    c_filter = val['agent_id'] == val['track_id'][:, 0, 0]
    p_in = val['p_in'][c_filter]
    v_in = (val['v_in'][c_filter][:, :, 0]**2 + val['v_in'][c_filter][:, :, 1] ** 2)**0.5
    for c in range(len(p_in)):
        in_test.append(
            np.diff(p_in[c], axis=0).flatten()
        )
        init_p.append(
            p_in[c][-1]
        )
in_test = np.array(in_test)
init_p = np.array(init_p)

HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




Creating prediction

In [132]:
for i in tqdm(range(60)):
    in_test_tensor = torch.tensor(in_test).type(torch.FloatTensor).to(device)
    pred = net_list[i](in_test_tensor).to('cpu').detach().numpy().flatten()
    in_test = np.insert(in_test, 36 + i, pred, axis=1)

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))




In [133]:
pred = in_test[:, 36:]

In [134]:
x_pred = pred[:, list(range(0, 60, 2))]
y_pred = pred[:, list(range(1, 61, 2))]

In [135]:
x_pred[:, 0] = x_pred[:, 0] + init_p[:, 0]

In [136]:
y_pred[:, 0] = y_pred[:, 0] + init_p[:, 1]

In [137]:
x_pred = x_pred.cumsum(1)
y_pred = y_pred.cumsum(1)

In [138]:
df = np.zeros((3200, 60))
num = 0
for i in range(30):
    df[:, num] = x_pred[:, i]
    df[:, num + 1] = y_pred[:, i]
    num += 2

In [139]:
df = pd.DataFrame(df)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,1714.678477,337.145987,1715.630701,338.046654,1716.602294,338.957383,1717.369724,339.846274,1718.335680,340.782406,...,1738.269962,358.306407,1739.177495,359.077312,1740.162139,360.070297,1741.066825,360.905427,1741.969162,361.464034
1,725.561381,1229.961182,725.561382,1229.961182,725.561382,1229.961182,725.561382,1229.961182,725.561382,1229.961182,...,725.561401,1229.961182,725.561401,1229.961182,725.561401,1229.961182,725.561401,1229.961182,725.561401,1229.961182
2,573.974954,1244.689453,574.203963,1244.689453,574.408069,1244.689453,574.643716,1244.689453,574.762411,1244.689453,...,579.363872,1244.689495,579.621827,1244.689495,579.916472,1244.689495,580.059042,1244.689495,580.399306,1244.689495
3,1691.341246,315.607222,1691.925688,316.140311,1692.679993,316.815216,1693.384787,317.527928,1694.038324,318.025214,...,1709.881831,331.348087,1710.664023,331.941350,1711.602001,332.798049,1712.411499,333.633179,1713.224421,334.189415
4,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646,...,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646,2124.079834,678.186646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,256.381991,805.615295,256.615635,805.615295,256.858189,805.615295,257.104720,805.615295,257.341587,805.615295,...,262.591894,805.615295,262.864040,805.615295,263.158685,805.615295,263.468357,805.615295,263.788794,805.615295
3196,587.843311,1154.503662,587.849642,1154.503662,587.849642,1154.503662,587.849642,1154.503662,587.849975,1154.503662,...,587.906726,1154.503662,587.906726,1154.503662,587.906726,1154.503662,587.906726,1154.503662,587.906726,1154.503662
3197,1755.736572,444.011328,1755.736572,444.360414,1755.736572,444.546339,1755.736572,444.740628,1755.736572,444.936353,...,1755.736572,448.625654,1755.736572,448.867775,1755.736572,448.868188,1755.736572,448.868188,1755.736572,449.067392
3198,574.704444,1289.077393,574.704446,1289.077393,574.704446,1289.077393,574.704446,1289.077393,574.704446,1289.077393,...,574.704446,1289.077393,574.704446,1289.077393,574.704446,1289.077393,574.704446,1289.077393,574.704446,1289.077393


In [140]:
df.insert(0, 'ID', pd.read_csv('sample_submission.csv')['ID'])

In [141]:
df.columns = pd.read_csv('sample_submission.csv').columns

In [142]:
df.to_csv('submission.csv', index=False)