In [56]:
#all imports

import pandas as pd
import numpy as np
from os import rename
from sqlalchemy.testing.util import total_size

from telemetry import VehicleRaceRecord
from telemetry.raw.TelemetryDB import TelemetryDB
from matplotlib import pyplot as plt
import pandas as pd

db = TelemetryDB("postgresql+psycopg2://racer:changeme@100.120.36.75:5432/racing")

#Available telemetry signals: ['accx_can', 'accy_can',  'ath', 'gear', 'nmot', 'pbrake_f', 'pbrake_r', 'speed', 'Steering_Angle'



In [57]:
from numpy.ma.core import shape


#from car2 import model


def data_each_car_10s(db, vehicle_id):
    # ignoring ath for now
    list_all_dfs = []
    car = db.get_car_race(track="barber", race_number=2, vehicle_code=vehicle_id)

    if car:
        df_accx = car.get_telemetry_10s("accx_can")
        df_accy = car.get_telemetry_10s("accy_can")
        df_speed = car.get_telemetry_10s("speed")
        #df_ath = car.get_telemetry_10s("ath")
        df_gear = car.get_telemetry_10s("gear")
        df_aps = car.get_telemetry_10s("aps")
        df_nmotor = car.get_telemetry_10s("nmot")
        df_pbrake_f = car.get_telemetry_10s("pbrake_f")
        df_pbrake_r = car.get_telemetry_10s("pbrake_r")
        list_all_dfs = [df_accx, df_accy, df_speed, df_gear, df_aps, df_nmotor, df_pbrake_f, df_pbrake_r]
    return list_all_dfs


#gets common index, ensures timestamps are in datetime format.

def index(list_dfs):
    for i, df in enumerate(list_dfs):
        list_dfs[i] = df.copy()
        list_dfs[i]['timestamp'] = pd.to_datetime(list_dfs[i]['timestamp'], unit='ns')
        if 'telemetry_value' in list_dfs[i].columns:
            list_dfs[i].rename(columns={'telemetry_value': 'value'},
                               inplace=True)  #rename everything to values for easier access

    start_time = min(df['timestamp'].min() for df in list_dfs)
    end_time = max(df['timestamp'].max() for df in list_dfs)
    common_index = pd.date_range(start=start_time, end=end_time, freq='1ms')
    return common_index, list_dfs


#resample and interpolate data
def resample(df, common_index):
    df_resampled = df.copy()
    df = df[~df['timestamp'].duplicated()]
    df_new = df.set_index('timestamp', inplace=False)
    df_resampled['value'] = pd.to_numeric(df_resampled['value'], errors='coerce')

    df_resampled = df_new.reindex(common_index).interpolate(
        method='time')  #timeâ€™: Works on daily and higher resolution data to interpolate given length of interval.
    df_resampled['value'] = df_resampled['value'].ffill().bfill()
    df_resampled.drop(columns=['name'], inplace=True, errors='ignore')

    return df_resampled


telemetry_names = ['accx', 'accy', 'speed', 'gear', 'aps', 'nmot', 'pbrake_f', 'pbrake_r', 'latitude', 'longitude']


def combine_dfs_car(telemetry_names, common_index, all_dfs):
    combined_df = pd.DataFrame(index=common_index)

    for name, df in zip(telemetry_names, all_dfs):
        df_interp = resample(df, common_index)
        combined_df[name] = pd.to_numeric(df_interp['value'], errors='coerce').values

    return combined_df


In [58]:
#gps data directly from csv file, data is
import pandas as pd
vehicle_id = "GR86-022-13"
df_gps = pd.read_csv(r"C:\Users\sanar\PycharmProjects\hack_the_track\backend\R2_barber_telemetry_data.csv")
df_gps = df_gps[df_gps['original_vehicle_id'] == "GR86-022-13"]
df_lat = df_gps[df_gps['telemetry_name'] == "VBOX_Lat_Min"]
df_long = df_gps[df_gps['telemetry_name'] == "VBOX_Long_Minutes"]

In [59]:
import pandas as pd
import numpy as np

def extract_10s(df, start_ts, ts_col, sample_count):
    df = df.copy()
    # Convert timestamps safely, infer format, handle UTC
    df[ts_col] = pd.to_datetime(df[ts_col], utc=True, errors='coerce')
    start_ts = pd.to_datetime(start_ts, utc=True)

    # drop any rows where parsing failed
    df = df.dropna(subset=[ts_col]).sort_values(ts_col).reset_index(drop=True)
    if df.empty:
        raise ValueError("Dataframe has no valid timestamps after parsing.")

    # find nearest timestamp to start_ts
    diffs = (df[ts_col] - start_ts).abs()
    nearest_idx = diffs.idxmin()

    # slice sample_count rows starting from nearest index
    end_idx = nearest_idx + sample_count
    df_slice = df.iloc[nearest_idx:end_idx].reset_index(drop=True)

    return df_slice


In [60]:
df_long_10s = extract_10s(df_long, start_ts = "2025-09-05T04:10:20.078Z", ts_col="timestamp", sample_count=200)

In [61]:
df_lat_10s = extract_10s(df_lat, start_ts = "2025-09-05T04:10:20.078Z", ts_col="timestamp", sample_count=200)

In [62]:
df_lat_10s.head()

Unnamed: 0,expire_at,lap,meta_event,meta_session,meta_source,meta_time,original_vehicle_id,outing,telemetry_name,telemetry_value,timestamp,vehicle_id,vehicle_number
0,,2,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:48.996Z,GR86-022-13,0,VBOX_Lat_Min,33.532623,2025-09-05 04:10:20.078000+00:00,GR86-022-13,13
1,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:48.996Z,GR86-022-13,0,VBOX_Lat_Min,33.532623,2025-09-05 04:10:20.121000+00:00,GR86-022-13,13
2,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.055Z,GR86-022-13,0,VBOX_Lat_Min,33.532642,2025-09-05 04:10:20.166000+00:00,GR86-022-13,13
3,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.055Z,GR86-022-13,0,VBOX_Lat_Min,33.532642,2025-09-05 04:10:20.208000+00:00,GR86-022-13,13
4,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.055Z,GR86-022-13,0,VBOX_Lat_Min,33.532665,2025-09-05 04:10:20.251000+00:00,GR86-022-13,13


In [63]:
df_lat_10s.tail()

Unnamed: 0,expire_at,lap,meta_event,meta_session,meta_source,meta_time,original_vehicle_id,outing,telemetry_name,telemetry_value,timestamp,vehicle_id,vehicle_number
195,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:56.028Z,GR86-022-13,0,VBOX_Lat_Min,33.534931,2025-09-05 04:10:28.521000+00:00,GR86-022-13,13
196,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:56.028Z,GR86-022-13,0,VBOX_Lat_Min,33.534966,2025-09-05 04:10:28.566000+00:00,GR86-022-13,13
197,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:56.028Z,GR86-022-13,0,VBOX_Lat_Min,33.534966,2025-09-05 04:10:28.608000+00:00,GR86-022-13,13
198,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:56.028Z,GR86-022-13,0,VBOX_Lat_Min,33.535,2025-09-05 04:10:28.651000+00:00,GR86-022-13,13
199,,3,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:56.254Z,GR86-022-13,0,VBOX_Lat_Min,33.535,2025-09-05 04:10:28.694000+00:00,GR86-022-13,13


In [70]:
def data_each_car(db, vehicle_id):
    # ignoring ath for now
    list_all_dfs = []
    car = db.get_car_race(track="barber", race_number=2, vehicle_code=vehicle_id)

    if car:
        df_accx = car.get_telemetry("accx_can")
        df_accy = car.get_telemetry("accy_can")
        df_speed = car.get_telemetry("speed")
        df_ath = car.get_telemetry("ath")
        df_gear = car.get_telemetry("gear")
        df_aps = car.get_telemetry("aps")
        df_nmotor = car.get_telemetry("nmot")

        df_pbrake_f = car.get_telemetry("pbrake_f")
        df_pbrake_r = car.get_telemetry("pbrake_r")
        list_all_dfs = [df_accx, df_accy, df_speed, df_gear, df_aps, df_nmotor, df_pbrake_f, df_pbrake_r]
    return list_all_dfs


In [75]:
import pandas as pd
import numpy as np

def extract_10s_flexible(df, start_ts, ts_col="timestamp", sample_count=None):
    """
    Extract 10 seconds of data starting from start_ts.
    Works whether timestamps are in a column or in the index.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe.
    start_ts : str or pd.Timestamp
        Start timestamp for extraction.
    ts_col : str, optional
        Name of the timestamp column (ignored if index is datetime), by default "timestamp".
    sample_count : int, optional
        Number of rows to extract. If None, extracts all rows within 10 seconds.

    Returns
    -------
    pd.DataFrame
        Dataframe sliced to 10 seconds (or sample_count if provided).
    """
    df = df.copy()

    # Convert start timestamp
    start_ts = pd.to_datetime(start_ts, utc=True)

    # Determine timestamps
    if isinstance(df.index, pd.DatetimeIndex):
        timestamps = df.index
    elif ts_col in df.columns:
        df[ts_col] = pd.to_datetime(df[ts_col], utc=True)
        timestamps = df[ts_col]
    else:
        raise ValueError("No datetime index or timestamp column found.")

    # Slice by nearest timestamp
    if sample_count is not None:
        # Compute absolute differences safely
        diffs = np.abs((timestamps - start_ts).total_seconds())
        nearest_idx = diffs.argmin()
        end_idx = nearest_idx + sample_count
        df_slice = df.iloc[nearest_idx:end_idx]
    else:
        end_ts = start_ts + pd.Timedelta(seconds=10)
        if isinstance(df.index, pd.DatetimeIndex):
            df_slice = df.loc[start_ts:end_ts]
        else:
            df_slice = df[(timestamps >= start_ts) & (timestamps < end_ts)]

    return df_slice.reset_index(drop=False)


In [71]:
telemetry_list = data_each_car(db, "GR86-022-13")
telemetry_list.append(df_lat)
telemetry_list.append(df_long)

common_index, list_dfs = index(telemetry_list)

final_df_car13 = combine_dfs_car(telemetry_names, common_index, list_dfs)


  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(
  df_resampled = df_new.reindex(common_index).interpolate(


In [72]:
final_df_car13.head()

Unnamed: 0,accx,accy,speed,gear,aps,nmot,pbrake_f,pbrake_r,latitude,longitude
2025-09-05 04:09:31.838000+00:00,0.27,0.017,92.87,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
2025-09-05 04:09:31.839000+00:00,0.27031,0.016476,92.887453,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
2025-09-05 04:09:31.840000+00:00,0.270619,0.015952,92.904905,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
2025-09-05 04:09:31.841000+00:00,0.270929,0.015429,92.922358,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
2025-09-05 04:09:31.842000+00:00,0.271238,0.014905,92.93981,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843


In [76]:
final_df_car13_10s = extract_10s_flexible(final_df_car13, "2025-09-05 04:09:31.838000+00:00")

In [77]:
final_df_car13_10s.head()

Unnamed: 0,index,accx,accy,speed,gear,aps,nmot,pbrake_f,pbrake_r,latitude,longitude
0,2025-09-05 04:09:31.838000+00:00,0.27,0.017,92.87,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
1,2025-09-05 04:09:31.839000+00:00,0.27031,0.016476,92.887453,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
2,2025-09-05 04:09:31.840000+00:00,0.270619,0.015952,92.904905,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
3,2025-09-05 04:09:31.841000+00:00,0.270929,0.015429,92.922358,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843
4,2025-09-05 04:09:31.842000+00:00,0.271238,0.014905,92.93981,2.0,100.0,5493.0,0.0,0.0,33.530674,-86.619843


In [78]:
final_df_car13_10s.tail()

Unnamed: 0,index,accx,accy,speed,gear,aps,nmot,pbrake_f,pbrake_r,latitude,longitude
9996,2025-09-05 04:09:41.834000+00:00,0.187191,-0.807833,87.171111,2.0,13.94976,4863.853075,0.0,0.0,33.529341,-86.622058
9997,2025-09-05 04:09:41.835000+00:00,0.184428,-0.809,87.173334,2.0,13.975717,4864.596336,0.0,0.0,33.529341,-86.622058
9998,2025-09-05 04:09:41.836000+00:00,0.181667,-0.810167,87.175556,2.0,14.001668,4865.339408,0.0,0.0,33.529341,-86.622059
9999,2025-09-05 04:09:41.837000+00:00,0.178905,-0.811333,87.177778,2.0,14.027618,4866.082479,0.0,0.0,33.52934,-86.622059
10000,2025-09-05 04:09:41.838000+00:00,0.176142,-0.8125,87.18,2.0,14.053576,4866.82574,0.0,0.0,33.52934,-86.62206


In [79]:
#feedin gin data for every 10 seconds:
import os
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
#define state inputs:

state = ['accx', 'accy', 'speed', 'nmot', 'latitude', 'longitude']
control = ['gear', 'aps', 'pbrake_f', 'pbrake_r']

#each df - convert to tensors - tensor dataset - dataloader - feed to NN









Using cpu device


In [84]:
#model class to declare RNN and defining a forward pass of the model


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, seq_length):
        #inherits from nn.Module
        super(RNN, self).__init__()
        self.hidden_size = hidden_size  #dim of memory inside lstm
        self.num_layers = num_layers  #stacked lstm layers
        #lstm: long short term memory - looks at lng term dependencies in sequential data

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)  #correspond to input data shape
        self.seq_length = seq_length  #no of timestamps to look at to predict the next control output

        #num classes is the no of outputs predicted by the model

        #to convert memory vector to outputs (shaping constraints)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        #inital hidden, cell states - these are internal memory vectors
        #hidden = short term memory, current output of LSTM at a given time
        hidden_state = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        #cell state = long term memory, stores trends (remmebers info over many time steps)

        cell_states = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        #forward propagate lstm
        out, _ = self.lstm(x)
        #out, _ = self.lstm(x, (hidden_state,
        #                        cell_states))  #out; tensor of shape(batch_soze, seq_length, hidden_size) - at the final time step
        #decode the hidden state of t
        out = self.fc(out[:, -1, :])
        return out

In [80]:
#preprocess data for nn
#create sequences from data (seq_len timestamps as input - the next timestamp is the target)

class CarSequenceDataset(torch.utils.data.Dataset):
    def __init__(self, df, state_cols, control_cols, seq_len, stride):
        self.seq_len = seq_len #length of input sequences
        # Convert directly to tensors
        self.states = torch.tensor(df[state_cols].values, dtype=torch.float32)
        self.controls = torch.tensor(df[control_cols].values, dtype=torch.float32)
        self.stride = stride #the step between the start o consecutive sequences - to reduce overlapping between sequences being fed to the network.
        self.total_size = self.states.size(0) #total number of timestamps

        #compute all possible start indices
        self.indices = list(range(0, self.total_size - self.seq_len, self.stride)) #first seq starts at t0, second at t0+stride, next at t0 + 2*stride, etc

        #the target timestamp is: i+seq_len, so the input is from i:i+seq_len, so i<total_size - seq_len
    def __len__(self):
        return len(self.indices)

    def __getitem__(self, index):
        idx = self.indices[index] #index of the first timestep of seq

        # Input: current states + controls
        x_seq = torch.cat([
            self.states[idx:idx + self.seq_len],
            self.controls[idx:idx + self.seq_len]
        ], dim=1)  # concatenate along feature dimension

        # Output: next states
        y_seq = self.states[idx + self.seq_len]
        return x_seq, y_seq


input_size = len(state) + len(control)  # 6 + 4 = 10
output_size = len(state)  # 6



In [82]:
from sklearn.preprocessing import StandardScaler

# Choose columns to scale (all states + controls)
cols_to_scale = state + control

#separate scalers for training/testing

scaler_in = StandardScaler()
scaler_out = StandardScaler()


# Fit scaler on the training portion only
train_len = int(0.8 * len(final_df_car13_10s))
df_train_raw = final_df_car13_10s.iloc[:train_len].reset_index(drop=True)
df_test_raw = final_df_car13_10s.iloc[train_len:].reset_index(drop=True)

scaler_in.fit(df_train_raw[cols_to_scale])  # fit only on train
scaler_out.fit(df_train_raw[state]) #outputs only
# Transform both train and test
df_train = df_train_raw.copy()
df_test = df_test_raw.copy()

#inputs:state+control
df_train[cols_to_scale] = scaler_in.transform(df_train_raw[cols_to_scale])
df_test[cols_to_scale] = scaler_in.transform(df_test_raw[cols_to_scale])

#now transform outputs
df_train[state] = scaler_out.transform(df_train_raw[state])
df_test[state] = scaler_out.transform(df_test_raw[state])


In [83]:
seq_length = 10
train_dataset = CarSequenceDataset(df_train, state, control, seq_length, stride = seq_length*2)
test_dataset = CarSequenceDataset(df_test, state, control, seq_length, stride = seq_length*2)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle = False)  #dont need to shuffle time series data??
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [85]:

model = RNN(input_size=10, hidden_size=64, num_layers=2, seq_length=20).to(device)

#regression based
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 10
step = 0
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        step+=1
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}, Step {step}, Loss: {loss.item():.4f}")

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {avg_loss:.4f}")


    model.eval()
    test_loss = 0
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            test_loss += loss.item()
    avg_test_loss = test_loss / len(test_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Test Loss: {avg_test_loss:.4f}")

torch.save(model.state_dict(), "car13_10s.pt")

Epoch 1, Step 1, Loss: 0.9711
Epoch 1, Step 2, Loss: 1.7542
Epoch 1, Step 3, Loss: 0.2795
Epoch 1, Step 4, Loss: 0.7415
Epoch 1, Step 5, Loss: 0.9324
Epoch 1, Step 6, Loss: 1.0260
Epoch 1, Step 7, Loss: 1.9950
Epoch 1/10, Train Loss: 1.1000
Epoch 1/10, Test Loss: 4.5845
Epoch 2, Step 8, Loss: 0.8499
Epoch 2, Step 9, Loss: 1.5986
Epoch 2, Step 10, Loss: 0.2605
Epoch 2, Step 11, Loss: 0.7012
Epoch 2, Step 12, Loss: 0.8809
Epoch 2, Step 13, Loss: 0.9486
Epoch 2, Step 14, Loss: 1.8629
Epoch 2/10, Train Loss: 1.0147
Epoch 2/10, Test Loss: 4.3413
Epoch 3, Step 15, Loss: 0.7554
Epoch 3, Step 16, Loss: 1.4389
Epoch 3, Step 17, Loss: 0.2421
Epoch 3, Step 18, Loss: 0.6533
Epoch 3, Step 19, Loss: 0.7980
Epoch 3, Step 20, Loss: 0.8318
Epoch 3, Step 21, Loss: 1.6818
Epoch 3/10, Train Loss: 0.9145
Epoch 3/10, Test Loss: 4.0198
Epoch 4, Step 22, Loss: 0.5944
Epoch 4, Step 23, Loss: 1.1869
Epoch 4, Step 24, Loss: 0.2193
Epoch 4, Step 25, Loss: 0.5875
Epoch 4, Step 26, Loss: 0.6653
Epoch 4, Step 27, Lo