In [1]:
import os, os.path 
import numpy 
from torch.utils.data import Dataset, DataLoader
import pickle
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor

"""Change to the data folder"""
train_path = "./new_train/new_train"
test_path = './new_val_in/new_val_in'


In [2]:
class ArgoverseDataset(Dataset):
    
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self): #len(val_dataset)
        return len(self.pkl_list)
    
    def __getitem__(self, idx): #val_dataset[0]
        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        if self.transform:
            data = self.transform(data)
        return data

In [3]:
train_dataset = ArgoverseDataset(data_path=train_path) 
test_dataset  = ArgoverseDataset(data_path=test_path) 

In [4]:
in_train, out_train = [], []
for i in tqdm(range((len(train_dataset)))):
    val = train_dataset[i]
    p_in = val['p_in'][val['car_mask'].reshape(-1) == 1]
    v_in = val['v_in'][val['car_mask'].reshape(-1) == 1]
    p_out = val['p_out'][val['car_mask'].reshape(-1) == 1]
    for c in range(len(p_in)):
        in_train.append(p_in[c].reshape(19*2))
        out_train.append(p_out[c].reshape(30*2))
in_train = np.array(in_train)
out_train = np.array(out_train)

HBox(children=(FloatProgress(value=0.0, max=205942.0), HTML(value='')))




# Method 1. Seperate x and y to train 30 * 2 = 60 models

In [5]:
x_in_train = in_train[:, list(range(0, 38, 2))]
y_in_train = in_train[:, list(range(1, 39, 2))]
x_out_train = out_train[:, list(range(0, 60, 2))]
y_out_train = out_train[:, list(range(1, 61, 2))]

# Training 

In [15]:
x_regs = [LinearRegression()] * 30

for i in tqdm(range(30)):
    x_regs[i] = LinearRegression().fit(x_in_train, x_out_train[:, i])
    x_in_train = np.insert(x_in_train, 19 + i, x_out_train[:, i], axis=1)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [20]:
y_regs = [LinearRegression()] * 30

for i in tqdm(range(30)):
    y_regs[i] = LinearRegression().fit(y_in_train, y_out_train[:, i])
    y_in_train = np.insert(y_in_train, 19 + i, y_out_train[:, i], axis=1)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [None]:
#random forest regressor?
x_regs = [RandomForestRegressor()] * 30
y_regs = [RandomForestRegressor()] * 30

for i in tqdm(range(30)):
    
    x_regs[i] = RandomForestRegressor().fit(x_in_train, x_out_train[:, i])
    x_in_train = np.insert(x_in_train, 19 + i, x_out_train[:, i], axis=1)

    y_regs[i] = RandomForestRegressor().fit(y_in_train, y_out_train[:, i])
    y_in_train = np.insert(y_in_train, 19 + i, y_out_train[:, i], axis=1)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

# Predicting

In [86]:
in_test = []
for i in tqdm(range((len(test_dataset)))):
    val = test_dataset[i]
    p_in = val['p_in'][test_dataset[0]['agent_id'] == test_dataset[0]['track_id'][:, 0, 0]]
    for c in range(len(p_in)):
        in_test.append(p_in[c].reshape(19*2))
in_test = np.array(in_test)

HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




In [87]:
in_test.shape

(3200, 38)

In [88]:
x_regs[0].coef_.shape

(19,)

In [89]:
x_in_test = in_test[:, list(range(0, 38, 2))]
y_in_test = in_test[:, list(range(1, 39, 2))]

In [90]:
for i in tqdm(range(30)):
    pred = x_regs[i].predict(x_in_test)
    x_in_test = np.insert(x_in_test, 19 + i, pred, axis=1)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [91]:
for i in tqdm(range(30)):
    pred = y_regs[i].predict(y_in_test)
    y_in_test = np.insert(y_in_test, 19 + i, pred, axis=1)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [93]:
df = pd.read_csv('sample_submission.csv')
df

Unnamed: 0,ID,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60
0,10002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10028,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3196,99,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3197,9905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3198,9910,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
for i in tqdm(range(3200)):
    lst = []
    num = 1
    for n in range(19, 49):
        df.iloc[i, num] = x_in_test[i, n]
        num += 1
        df.iloc[i, num] = y_in_test[i, n]
        num += 1

HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




In [109]:
df.to_csv('submission.csv', index=False)

In [111]:
df

Unnamed: 0,ID,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60
0,10002,1714.754344,337.152387,1715.677670,338.056650,1716.601964,338.974121,1717.512114,339.851937,1718.437880,...,1737.783124,359.950404,1738.686756,360.850508,1739.583736,361.742451,1740.477457,362.634848,1741.355344,363.513899
1,10015,693.434504,1220.144486,693.512056,1220.119439,693.579558,1220.095462,693.626742,1220.072650,693.705877,...,694.985068,1219.545552,695.036094,1219.521923,695.100160,1219.496220,695.153584,1219.476378,695.210665,1219.456228
2,10019,574.009356,1244.648332,574.226675,1244.577425,574.439073,1244.500024,574.664553,1244.422792,574.882465,...,579.537000,1242.549259,579.761381,1242.462147,579.984835,1242.376045,580.198848,1242.291212,580.409314,1242.209159
3,10028,1636.239672,293.478025,1636.318136,293.299637,1636.423760,293.047926,1636.534928,292.765122,1636.641954,...,1639.380484,285.639019,1639.521471,285.290644,1639.656962,284.947202,1639.792041,284.608493,1639.926358,284.276288
4,1003,2063.918111,630.932974,2062.470192,629.857802,2061.032409,628.787223,2059.597602,627.718526,2058.160148,...,2028.592873,604.957079,2027.216888,603.955618,2025.850621,602.962597,2024.503664,601.983504,2023.176561,601.022421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,246.370717,811.999502,246.290109,811.990821,246.173133,811.993536,246.076394,811.990048,246.017918,...,244.106804,812.266517,244.001960,812.284220,243.913460,812.303564,243.824397,812.324087,243.733028,812.345713
3196,99,585.751420,1129.900157,585.797897,1128.908636,585.843594,1127.870343,585.891973,1126.798410,585.939571,...,586.925856,1103.349349,586.971536,1102.283957,587.015969,1101.227703,587.060980,1100.190613,587.105813,1099.169524
3197,9905,1755.590455,444.194586,1755.428628,444.513582,1755.265966,444.829659,1755.101929,445.137895,1754.938232,...,1751.516425,451.797314,1751.356127,452.088470,1751.194718,452.384488,1751.037029,452.675009,1750.882828,452.959651
3198,9910,575.596814,1300.713608,575.602856,1300.581118,575.609261,1300.447268,575.615529,1300.312322,575.621802,...,575.756872,1297.391146,575.763152,1297.262272,575.769017,1297.133463,575.775376,1297.007116,575.781780,1296.883654


# Method 2. Seperate x and y to train 30 * 2 = 60 models

In [7]:
regs = [LinearRegression()] * 60

for i in tqdm(range(60)):
    regs[i] = LinearRegression().fit(in_train, out_train[:, i])
    in_train = np.insert(in_train, 38 + i, out_train[:, i], axis=1)

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))




In [9]:
in_train.shape

(1812171, 98)

In [28]:
in_test = []
for i in tqdm(range((len(test_dataset)))):
    val = test_dataset[i]
    p_in = val['p_in'][test_dataset[0]['agent_id'] == test_dataset[0]['track_id'][:, 0, 0]]
    for c in range(len(p_in)):
        in_test.append(p_in[c].reshape(19*2))
in_test = np.array(in_test)

HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




In [29]:
for i in tqdm(range(60)):
    pred = regs[i].predict(in_test)
    in_test = np.insert(in_test, 38 + i, pred, axis=1)

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))




In [36]:
df = pd.DataFrame(in_test[:, -60:])

In [38]:
df.insert(0, 'ID', pd.read_csv('sample_submission.csv')['ID'])

In [40]:
df.columns = pd.read_csv('sample_submission.csv').columns

In [41]:
df.to_csv('submission.csv', index=False)

In [42]:
df

Unnamed: 0,ID,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60
0,10002,1714.754745,337.156018,1715.686270,338.068825,1716.616790,338.991677,1717.519942,339.869462,1718.448405,...,1737.899035,360.291377,1738.807530,361.214403,1739.712858,362.130911,1740.614350,363.049426,1741.499462,363.955847
1,10015,693.433004,1220.150444,693.510946,1220.123295,693.580457,1220.100249,693.628518,1220.069392,693.707069,...,695.021581,1219.579781,695.075636,1219.554086,695.141108,1219.536111,695.197409,1219.517433,695.257024,1219.496710
2,10019,574.008523,1244.651308,574.224630,1244.581580,574.438328,1244.502294,574.661300,1244.433990,574.879818,...,579.536117,1242.755717,579.760669,1242.683735,579.983132,1242.612823,580.197534,1242.543442,580.407400,1242.475989
3,10028,1636.233809,293.477647,1636.309346,293.300019,1636.407191,293.050967,1636.511208,292.770651,1636.620058,...,1639.354141,285.814604,1639.493698,285.484705,1639.629957,285.154831,1639.763365,284.832093,1639.892966,284.517940
4,1003,2063.917919,630.923820,2062.469822,629.841290,2061.031886,628.762961,2059.595582,627.687841,2058.157927,...,2028.566642,604.398280,2027.189209,603.358623,2025.820489,602.325214,2024.472604,601.305685,2023.142944,600.299783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,246.368039,812.000918,246.284469,812.008311,246.172920,811.999028,246.076326,811.990767,246.015719,...,244.145233,812.166669,244.045560,812.166351,243.958970,812.181587,243.875706,812.190559,243.788220,812.200646
3196,99,585.743948,1129.903278,585.791390,1128.914670,585.830486,1127.879333,585.868506,1126.811868,585.917703,...,586.767079,1103.606738,586.799652,1102.559299,586.830952,1101.522856,586.862967,1100.505620,586.891326,1099.504829
3197,9905,1755.589899,444.191589,1755.428054,444.508770,1755.266756,444.822975,1755.103561,445.128861,1754.942020,...,1751.633280,451.651567,1751.481897,451.931887,1751.330478,452.217237,1751.184580,452.496020,1751.039010,452.767866
3198,9910,575.597000,1300.714215,575.602548,1300.583052,575.609517,1300.449888,575.615240,1300.316221,575.622031,...,575.765557,1297.452600,575.773344,1297.327340,575.779107,1297.202918,575.786477,1297.081009,575.792803,1296.960993
