# Data Processing for Time Series (LSTM, MLP, Transformer, etc)

In [22]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [23]:
data = pd.read_csv('../../ml_data/train_data.csv')

In [24]:
data

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle
0,0.358,0.358,-4.94,-10.76,0.0,0.0,0.0,0.0
1,0.421,0.421,-4.94,-10.76,0.0,0.0,0.0,0.0
2,0.485,0.485,-4.94,-10.76,0.0,0.0,0.0,0.0
3,0.548,0.548,-4.94,-10.76,0.0,0.0,0.0,0.0
4,0.612,0.612,-4.94,-10.76,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
111936,2321.834,130.763,6.17,-20.73,0.0,0.0,-10.0,-10.0
111937,2321.906,130.835,6.17,-20.73,0.0,0.0,-10.0,-10.0
111938,2321.978,130.907,6.17,-20.73,0.0,0.0,-10.0,-10.0
111939,2322.049,130.978,6.17,-20.73,0.0,0.0,-10.0,-10.0


In [25]:

data['dt'] = data['time_begin'].diff()
data['new_traj'] = np.abs(data['dt']) > 10
data['traj_num'] = data['new_traj'].cumsum().ffill().astype(int)
dfs = []
for _, group in data.groupby('traj_num'):
    group['delta_theta_x'] = group['theta_x'].diff().shift(-1)
    group['delta_theta_y'] = group['theta_y'].diff().shift(-1)
    group['delta_vel_x'] = group['vel_x'].diff().shift(-1)
    group['delta_vel_y'] = group['vel_y'].diff().shift(-1)
    group = group.dropna()
    dfs.append(group)

data = (pd.concat(dfs, ignore_index=True)
          .drop(columns=['new_traj',
                         'dt',
                         'traj_num']))
data

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y
0,0.421,0.421,-4.94,-10.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.485,0.485,-4.94,-10.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.548,0.548,-4.94,-10.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.612,0.612,-4.94,-10.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.804,0.804,-4.94,-10.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
111615,2321.763,130.692,6.17,-20.73,0.0,0.0,-10.0,-10.0,0.0,0.0,0.0,0.0
111616,2321.834,130.763,6.17,-20.73,0.0,0.0,-10.0,-10.0,0.0,0.0,0.0,0.0
111617,2321.906,130.835,6.17,-20.73,0.0,0.0,-10.0,-10.0,0.0,0.0,0.0,0.0
111618,2321.978,130.907,6.17,-20.73,0.0,0.0,-10.0,-10.0,0.0,0.0,0.0,0.0


In [26]:
data.describe()

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y
count,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0,111620.0
mean,937.341524,36.329852,-0.245466,0.815183,0.328204,-0.329811,-0.171322,-0.059953,0.020522,-0.032068,-0.002469,-0.000682
std,645.8138,44.644732,44.896663,45.915282,15.274926,16.199538,1.94232,2.046768,1.226271,1.294674,9.113204,10.680893
min,0.349,0.349,-100.0,-99.99,-91.594203,-127.183099,-10.0,-10.0,-6.67,-9.78,-57.134109,-68.672546
25%,338.52625,8.39675,-32.66,-31.59,-9.714286,-10.152818,-1.0,-1.0,-0.77,-0.82,-5.462963,-7.094415
50%,888.4825,20.1065,0.91,-1.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1456.6655,46.35625,31.46,34.84,9.545455,9.411765,1.0,1.0,0.75,0.76,5.555556,7.168702
max,2495.546,330.539,100.0,99.96,109.545455,104.714286,9.0,10.0,7.76,7.7,52.703286,61.678322


In [27]:
data[data['delta_theta_y'] < -50]

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y


In [28]:
data_insp = data[['time_begin', 'time_begin_traj', 'theta_x', 'theta_y', 'vel_x', 'vel_y', 'delta_theta_x', 'delta_theta_y', 'delta_vel_x', 'delta_vel_y']]
data_insp.head(200)

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y
0,0.421,0.421,-4.94,-10.76,0.000000,0.000000,0.00,0.00,0.000000,0.000000
1,0.485,0.485,-4.94,-10.76,0.000000,0.000000,0.00,0.00,0.000000,0.000000
2,0.548,0.548,-4.94,-10.76,0.000000,0.000000,0.00,0.00,0.000000,0.000000
3,0.612,0.612,-4.94,-10.76,0.000000,0.000000,0.00,0.00,0.000000,0.000000
4,0.804,0.804,-4.94,-10.76,0.000000,0.000000,0.00,0.00,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
195,37.281,7.110,-34.69,-49.57,-17.101449,-15.362319,-1.04,-1.49,2.244306,-5.923395
196,37.351,7.180,-35.73,-51.06,-14.857143,-21.285714,-0.72,-1.48,4.571429,0.142857
197,37.421,7.250,-36.45,-52.54,-10.285714,-21.142857,-0.92,-1.46,-3.047619,-0.016563
198,37.490,7.319,-37.37,-54.00,-13.333333,-21.159420,-1.26,-1.82,-4.666667,-4.840580


In [29]:
data_insp.loc[98600:98623]

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y
98600,1875.539,8.589,26.3,87.24,11.470588,14.117647,0.91,1.04,2.111501,1.404741
98601,1875.606,8.656,27.21,88.28,13.58209,15.522388,0.81,0.82,-1.670325,-3.463565
98602,1875.674,8.724,28.02,89.1,11.911765,12.058824,0.0,0.77,-11.911765,-0.566286
98603,1875.741,8.791,28.02,89.87,0.0,11.492537,1.46,1.23,5.59387,-6.779894
98604,1876.002,9.052,29.48,91.1,5.59387,4.712644,2.52,1.74,17.315221,11.105538
98605,1876.112,9.162,32.0,92.84,22.909091,15.818182,2.79,1.59,18.120321,7.564171
98606,1876.18,9.23,34.79,94.43,41.029412,23.382353,1.83,1.11,-13.715979,-6.815189
98607,1876.247,9.297,36.62,95.54,27.313433,16.567164,1.21,0.94,-9.519315,-2.743635
98608,1876.315,9.365,37.83,96.48,17.794118,13.823529,1.05,0.86,-2.122476,-0.987709
98609,1876.382,9.432,38.88,97.34,15.671642,12.835821,1.14,1.21,1.093064,4.958297


In [30]:
seq_len = 50
pred_len = 2


In [31]:
input_data = data[['time_begin', 'time_begin_traj', 'theta_x', 'theta_y', 'X_throttle', 'Y_throttle']]
output_data = data[['delta_theta_x', 'delta_theta_y', 'delta_vel_x', 'delta_vel_y']]
input_data = input_data.values
output_data = output_data.values

In [32]:
# tiled_input_data = np.array([input_data[:seq_len]])
num_sequences = input_data.shape[0] - seq_len
tiled_input_data = np.array([input_data[i:i+seq_len] for i in range(num_sequences)])
tiled_input_data.shape

(111570, 50, 6)

In [33]:
output_data = output_data[seq_len-1:]
tiled_out_data = np.array([output_data[i:i+pred_len] for i in range(num_sequences)])

In [34]:
data.iloc[seq_len-1:2*seq_len]


Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,delta_theta_x,delta_theta_y,delta_vel_x,delta_vel_y
49,4.416,4.416,5.29,-14.6,0.0,0.0,2.0,0.0,0.0,0.52,0.0,8.125
50,4.48,4.48,5.29,-14.08,0.0,8.125,2.0,0.0,0.75,0.0,11.904762,-8.125
51,4.543,4.543,6.04,-14.08,11.904762,0.0,2.0,0.0,0.54,0.0,-3.333333,0.0
52,4.606,4.606,6.58,-14.08,8.571429,0.0,2.0,0.0,0.7,0.0,2.366071,0.0
53,4.67,4.67,7.28,-14.08,10.9375,0.0,2.0,0.0,1.58,0.0,-2.621711,0.0
54,4.86,4.86,8.86,-14.08,8.315789,0.0,1.0,0.0,1.7,0.0,7.721946,0.0
55,4.966,4.966,10.56,-14.08,16.037736,0.0,1.0,0.0,1.18,0.59,2.399764,9.21875
56,5.03,5.03,11.74,-13.49,18.4375,9.21875,1.0,0.0,0.85,0.0,-5.360577,-9.21875
57,5.095,5.095,12.59,-13.49,13.076923,0.0,1.0,0.0,0.82,0.0,-0.06105,0.0
58,5.158,5.158,13.41,-13.49,13.015873,0.0,1.0,0.0,0.79,0.0,-0.862027,0.0


In [7]:
from robo_limb_ml.utils.data_loader import DataLoader

data_loader = DataLoader('../../ml_data/train_data.csv',
                         batch_size=1024,
                         device='cpu',
                         pad=True)

for i in range(data_loader.n_batches):
    batch, lable, set_num = data_loader.get_batch()
    print(set_num)
    if set_num == 0 or set_num==1:
        print(batch[:, 0, 0])
    
    

Index(['time_begin', 'time_begin_traj', 'theta_x', 'theta_y', 'vel_x', 'vel_y',
       'X_throttle', 'Y_throttle', 'set_num', 'delta_theta_x', 'delta_theta_y',
       'delta_vel_x', 'delta_vel_y'],
      dtype='object')
0.0
tensor([  0.4210,   0.4850,   0.5480,  ..., 148.9370, 149.0040, 149.0700])
0.0
tensor([149.1370, 149.2030, 149.2700,  ..., 341.0060, 341.0720, 341.1390])
0.0
tensor([341.3690, 362.0210, 362.0860,  ..., 513.5480, 513.6170, 513.8580])
0.0
tensor([513.9710, 514.0410, 514.1120,  ..., 707.5980, 707.6660, 707.7350])
0.0
tensor([707.8040, 708.0510, 708.1600,  ..., 901.4290, 901.4970, 901.5660])
0.0
tensor([ 901.6360,  901.7050,  901.7750,  ..., 1097.0081, 1097.0760,
        1097.3350])
0.0
tensor([1097.4460, 1097.5150, 1097.5830,  ..., 1272.6040, 1272.6710,
        1272.7371])
0.0
tensor([1272.9890, 1273.0980, 1273.1639,  ..., 1468.9860, 1469.0560,
        1469.1250])
0.0
tensor([1469.3860, 1469.4980, 1469.5680,  ..., 1687.1730, 1687.2410,
        1687.5070])
0.0
tensor([1