In [107]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

"""Change to the data folder"""
train_path = "./new_train/new_train"
test_path = './new_val_in/new_val_in'


# Train/validation test dataset loading

In [3]:
class ArgoverseDataset(Dataset):
    
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self): #len(val_dataset)
        return len(self.pkl_list)
    
    def __getitem__(self, idx): #val_dataset[0]
        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        if self.transform:
            data = self.transform(data)
        return data

In [4]:
train_dataset = ArgoverseDataset(data_path=train_path) 
test_dataset  = ArgoverseDataset(data_path=test_path) 

In [29]:
len(train_dataset)

205942

In [45]:
in_train, out_train = [], []
for i in tqdm(range(0, 200000)):
    val = train_dataset[i]
    p_in = val['p_in'][val['car_mask'].reshape(-1) == 1]
    p_out = val['p_out'][val['car_mask'].reshape(-1) == 1]
    for c in range(len(p_in)):
        in_train.append(p_in[c].reshape(19*2))
        out_train.append(p_out[c].reshape(30*2))
in_train = np.array(in_train)
out_train = np.array(out_train)

HBox(children=(FloatProgress(value=0.0, max=200000.0), HTML(value='')))




In [46]:
in_valid, out_valid = [], []
for i in tqdm(range(200000-1, 205942)):
    val = train_dataset[i]
    p_in = val['p_in'][val['car_mask'].reshape(-1) == 1]
    p_out = val['p_out'][val['car_mask'].reshape(-1) == 1]
    for c in range(len(p_in)):
        in_valid.append(p_in[c].reshape(19*2))
        out_valid.append(p_out[c].reshape(30*2))
in_valid = np.array(in_valid)
out_valid = np.array(out_valid)

HBox(children=(FloatProgress(value=0.0, max=5943.0), HTML(value='')))




In [105]:
x_in_train = in_train[:, list(range(0, 38, 2))]
y_in_train = in_train[:, list(range(1, 39, 2))]

x_out_train = out_train[:, list(range(0, 60, 2))]
y_out_train = out_train[:, list(range(1, 61, 2))]

In [80]:
x_in_train.shape, y_in_train.shape, x_out_train.shape, y_out_train.shape

((1760155, 19), (1760155, 19), (1760155, 30), (1760155, 30))

In [81]:
x_in_valid = in_valid[:, list(range(0, 38, 2))]
y_in_valid = in_valid[:, list(range(1, 39, 2))]

x_out_valid = out_valid[:, list(range(0, 60, 2))]
y_out_train = out_valid[:, list(range(1, 61, 2))]

In [88]:
#Use that 19 time points to predict every future points: 
#obviously should be lower than the final LR
for i in range(30):
    print(i, reg.score(x_in_valid, x_out_valid[:, i]))

0 0.9999999720564747
1 0.9999998606835677
2 0.9999995591729811
3 0.9999990666000739
4 0.9999983731784919
5 0.9999975046702431
6 0.999996436367472
7 0.9999951806544309
8 0.9999937327352634
9 0.999992081261241
10 0.9999902376357248
11 0.9999882150271086
12 0.9999859938986253
13 0.9999835605996417
14 0.9999809603371024
15 0.9999781402339497
16 0.999975159806497
17 0.9999719303981158
18 0.9999685486390444
19 0.9999649747719512
20 0.9999611945165718
21 0.9999571666565411
22 0.9999529929593971
23 0.9999486070940553
24 0.9999440279500589
25 0.9999392402715116
26 0.9999343095449696
27 0.9999291747872935
28 0.999923934296924
29 0.9999185352651137


In [49]:
regs = [LinearRegression()] * 30

In [106]:
regs[0] = LinearRegression().fit(x_in_train, x_out_train[:, 0])
pred = regs[0].predict(x_in_train)
x_in_train = np.insert(x_in_train, 19 + 0, pred, axis=1)

for i in tqdm(range(1, 30)):
    regs[i] = LinearRegression().fit(x_in_train, x_out_train[:, i])
    pred = regs[i].predict(x_in_train)
    x_in_train = np.insert(x_in_train, 19 + i, pred, axis=1)

HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




In [109]:
x_in_train.shape

(1760155, 49)

Validation set: what is the score?

In [120]:
for i in range(30):
    print(r2_score(x_in_train[:, 19 + i], x_out_train[:, i]))

0.9999999706316878
0.9999999522101889
0.9999999314364197
0.9999999059040638
0.9999998734611978
0.9999998341014829
0.9999997857053922
0.9999997270690452
0.9999996569478273
0.9999995749600455
0.9999994801562861
0.9999993694394474
0.9999992415629441
0.9999990980554518
0.9999989351657506
0.9999987506079562
0.9999985441856927
0.9999983148635783
0.9999980609190324
0.9999977815631815
0.9999974716478127
0.9999971361935339
0.9999967684914657
0.9999963682304969
0.9999959328154526
0.9999954634669268
0.9999949583667719
0.9999944146837969
0.9999938356211475
0.9999932219293578


In [113]:
.shape

(52019, 30)

In [37]:
train

array([[3277.29638672, 1947.62609863, 3277.29614258, ..., 1947.62573242,
        3277.29638672, 1947.62585449],
       [3277.13671875, 1977.72497559, 3277.09350586, ..., 1977.72570801,
        3277.17016602, 1977.72717285],
       [3232.28955078, 1922.98181152, 3232.33007812, ..., 1922.92980957,
        3232.2590332 , 1922.96984863],
       ...,
       [2998.44604492, 1464.96325684, 2998.44555664, ..., 1464.95947266,
        2998.44262695, 1464.95922852],
       [2989.00317383, 1454.67956543, 2988.98901367, ..., 1454.67407227,
        2989.01196289, 1454.68469238],
       [3026.58203125, 1490.43603516, 3026.58203125, ..., 1482.21081543,
        3016.82006836, 1481.72143555]])

In [23]:
np.array(df).shape

(886, 38)

In [11]:
df.iloc[0][0].shape

(6, 19, 2)