In [53]:
import os
import pathlib

import numpy as np
import pandas as pd
import seaborn as sns
from collections import Counter
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from tqdm import tqdm


from matplotlib import pyplot as plt
from tqdm.contrib.concurrent import process_map

sns.set_theme()

In [54]:
def get_samples(files):
    dfs = process_map(pd.read_csv, files, max_workers=24, chunksize=100)
    df = pd.concat(dfs, ignore_index=True)
    return df, dfs

In [55]:
train_path = pathlib.Path("../data/AUDI_A3_3RD_GEN")
test_path = pathlib.Path("../data/test")

train_files = list(train_path.glob("*.csv"))
test_files = list(test_path.glob("*.csv"))

train_df, train_dfs = get_samples(train_files)
test_df, test_dfs = get_samples(test_files)

100%|██████████| 3500/3500 [00:03<00:00, 1040.55it/s]
100%|██████████| 20000/20000 [00:13<00:00, 1479.24it/s]


In [56]:
save_columns = ['roll', 'aEgo', 'vEgo', 'latAccelSteeringAngle', 'steeringAngleDeg']

for i in range(len(train_dfs)):
    train_dfs[i] = train_dfs[i][save_columns].rename(columns={
        'latAccelSteeringAngle': 'targetLateralAcceleration',
        'steeringAngleDeg': 'steerCommand'
    })
    
train_df = train_df[save_columns].rename(columns={
    'latAccelSteeringAngle': 'targetLateralAcceleration',
    'steeringAngleDeg': 'steerCommand'
})

train_dfs[0].head()

Unnamed: 0,roll,aEgo,vEgo,targetLateralAcceleration,steerCommand
0,-0.03576,-0.019322,21.293262,-0.283218,1.7
1,-0.035548,-0.018786,21.290297,-0.359889,2.29413
2,-0.035337,-0.00928,21.28811,-0.368352,2.399955
3,-0.035125,0.002241,21.287054,-0.352069,2.301402
4,-0.035013,0.017588,21.288178,-0.349136,2.3


In [57]:
scalers = {
    'aEgo': StandardScaler(),
    'vEgo': StandardScaler(),
    'roll': StandardScaler(),
    'targetLateralAcceleration': StandardScaler() # RobustScaler() 
}

def scale_steering_by_first_10_seconds(df, steering_col='steerCommand'):
    first_10 = df.iloc[:100]
    scaler = RobustScaler()
    scaler.fit(first_10[[steering_col]])
    df_scaled = df.copy()
    df_scaled[steering_col] = scaler.transform(df[[steering_col]])
    return df_scaled

for col, scaler in scalers.items():
    scalers[col].fit(train_df[[col]])
    
train_scaled = []
for df in train_dfs:
    df_scaled = df.copy()
    for col, scaler in scalers.items():
        df_scaled[col] = scaler.transform(df[[col]])
    df_scaled = scale_steering_by_first_10_seconds(df_scaled)
    train_scaled.append(df_scaled)

test_scaled = []
for df in test_dfs:
    df_scaled = df.copy()
    for col, scaler in scalers.items():
        df_scaled[col] = scaler.transform(df[[col]])
    df_scaled = scale_steering_by_first_10_seconds(df_scaled)
    test_scaled.append(df_scaled)

In [58]:
train_scaled[0].head()

Unnamed: 0,roll,aEgo,vEgo,targetLateralAcceleration,steerCommand
0,-1.541447,-0.044635,-0.146943,-0.506492,0.245244
1,-1.534822,-0.04329,-0.147293,-0.63802,0.730932
2,-1.528197,-0.019467,-0.14755,-0.652538,0.817443
3,-1.521573,0.009405,-0.147675,-0.624604,0.736877
4,-1.51805,0.047865,-0.147543,-0.619572,0.735731


In [59]:
test_scaled[0].head()

Unnamed: 0,t,vEgo,aEgo,roll,targetLateralAcceleration,steerCommand
0,0.0,1.324645,-0.039566,0.753604,1.701459,-0.545725
1,0.1,1.323852,-0.094122,0.739077,1.780057,-0.566037
2,0.2,1.322957,-0.166476,0.72455,1.791398,-0.557145
3,0.3,1.323238,-0.052038,0.725702,1.783606,-0.570563
4,0.4,1.322598,-0.12661,0.727978,1.797469,-0.633812


In [60]:
# !pip3 install torch torchvision torchaudio


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random
import pickle

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Model ---
class LstmEncoderDecoder(nn.Module):
    def __init__(self, physics_input_size, control_input_size, hidden_size, num_layers, dropout=0.2):
        super().__init__()
        self.physics_encoder = nn.LSTM(physics_input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.control_encoder = nn.LSTM(control_input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.decoder = nn.LSTM(control_input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc_out = nn.Linear(hidden_size, 1)  # Predict targetLateralAcceleration

    def forward(self, input_physics, input_control_sequence, output_control_sequence):
        _, (hidden_phsc, cell_phsc) = self.physics_encoder(input_physics)
        _, (hidden_ctrl, cell_ctrl) = self.control_encoder(input_control_sequence)
        
        hidden_enc = (hidden_phsc + hidden_ctrl) / 2
        cell_enc = (cell_phsc + cell_ctrl) / 2
        
        hidden_enc = hidden_enc.clone()

        decoder_output, _ = self.decoder(output_control_sequence, (hidden_enc, cell_enc))

        output_localization = self.fc_out(decoder_output)
        
        return output_localization

In [None]:
class DrivingDataset(Dataset):
    def __init__(self, dfs, seq_len=20):
        self.samples = []
        for df in dfs:
            arr = df[['roll', 'aEgo', 'vEgo', 'targetLateralAcceleration', 'steerCommand']].values
            for i in range(len(arr) - seq_len - 1):
                physics_input = arr[i:i+seq_len, :3]
                control_input = arr[i:i+seq_len, 3:]
                y = arr[i+1:i+seq_len+1, 4]    # target: next steerCommand
                self.samples.append((physics_input, control_input, y))   
                
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        physics_input, control_input, y = self.samples[idx]
        return (torch.tensor(physics_input, dtype=torch.float32),
                torch.tensor(control_input, dtype=torch.float32),
                torch.tensor(y, dtype=torch.float32))

def train_val_split(dfs, val_ratio=0.2, seed=42):
    """Split list of dataframes into training and validation sets"""
    random.seed(seed)
    n_val = int(len(dfs) * val_ratio)
    
    # Shuffle the indices
    indices = list(range(len(dfs)))
    random.shuffle(indices)
    
    # Split into train and validation
    val_indices = indices[:n_val]
    train_indices = indices[n_val:]
    
    train_dfs = [dfs[i] for i in train_indices]
    val_dfs = [dfs[i] for i in val_indices]
    
    return train_dfs, val_dfs


In [None]:
def train_model(train_dfs, val_dfs=None, num_epochs=5, batch_size=64, seq_len=20,
                lr=1e-3, hidden_size=128, num_layers=4):
    train_dataset = DrivingDataset(train_dfs, seq_len=seq_len)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    if val_dfs:
        val_dataset = DrivingDataset(val_dfs, seq_len=seq_len)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    model = LstmEncoderDecoder(
        physics_input_size=3,
        control_input_size=2,
        hidden_size=hidden_size,
        num_layers=num_layers
    ).to(DEVICE)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        
        for physics_input, control_input, y in tqdm(train_loader):
            physics_input = physics_input.to(DEVICE)
            control_input = control_input.to(DEVICE)
            y = y.to(DEVICE)
            
            optimizer.zero_grad()
            out = model(physics_input, control_input)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            total_train_loss += loss.item()
        
        avg_train_loss = total_train_loss / len(train_loader)
        
        if val_dfs:
            model.eval()
            total_val_loss = 0
            
            with torch.no_grad():
                for physics_input, control_input, y in val_loader:
                    physics_input = physics_input.to(DEVICE)
                    control_input = control_input.to(DEVICE)
                    y = y.to(DEVICE)
                    
                    out = model(physics_input, control_input)
                    loss = criterion(out, y)
                    total_val_loss += loss.item()
            
            avg_val_loss = total_val_loss / len(val_loader)
            
            # Save best model
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), "../models/lstm_best_model.pt")
            
            print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
        else:
            print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f}")
    
    return model

In [None]:
train_split, val_split = train_val_split(train_scaled)
model = train_model(train_split, val_split, num_epochs=10)


 67%|██████▋   | 21272/31636 [5:56:23<1:08:38,  2.52it/s]     

In [None]:
torch.save(model.state_dict(), "../models/lstm_lataccel.pt")
