# Preprocessing

In [48]:
import pandas as pd
import numpy as np
import plotly.express as px

In [38]:
import awswrangler as wr

df_raw = wr.s3.read_parquet(
    path="s3://redline-datalake-590184144848-us-east-1/raw/telemetry",
    dataset=True,
    partition_filter=lambda x: x["year"] == "2026" and x["month"] == "01"
)

In [45]:
df = df_raw.copy(deep=True)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100571 entries, 0 to 100570
Data columns (total 23 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   vehicle_id             100571 non-null  string  
 1   timestamp              100571 non-null  Int64   
 2   session_id             100571 non-null  string  
 3   brake_disc_temp_fl     100571 non-null  float64 
 4   brake_disc_temp_fr     100571 non-null  float64 
 5   brake_disc_temp_rl     100571 non-null  float64 
 6   brake_disc_temp_rr     100571 non-null  float64 
 7   brake_fluid_pressure   100571 non-null  float64 
 8   brake_pad_wear_fl      100571 non-null  float64 
 9   brake_pad_wear_fr      100571 non-null  float64 
 10  brake_pad_wear_rl      100571 non-null  float64 
 11  brake_pad_wear_rr      100571 non-null  float64 
 12  engine_rpm             100571 non-null  Int32   
 13  engine_oil_temp        100571 non-null  float64 
 14  engine_oil_pressure 

In [12]:
unique_vehicle = df['vehicle_id'].unique()

In [13]:
print(unique_vehicle)

<StringArray>
['GT3-RACER-01']
Length: 1, dtype: string


In [41]:
droped_columns = ['vehicle_id','session_id','year','month','day','hour']

In [42]:
df = df.drop(columns=droped_columns)

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100571 entries, 0 to 100570
Data columns (total 17 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   timestamp              100571 non-null  Int64  
 1   brake_disc_temp_fl     100571 non-null  float64
 2   brake_disc_temp_fr     100571 non-null  float64
 3   brake_disc_temp_rl     100571 non-null  float64
 4   brake_disc_temp_rr     100571 non-null  float64
 5   brake_fluid_pressure   100571 non-null  float64
 6   brake_pad_wear_fl      100571 non-null  float64
 7   brake_pad_wear_fr      100571 non-null  float64
 8   brake_pad_wear_rl      100571 non-null  float64
 9   brake_pad_wear_rr      100571 non-null  float64
 10  engine_rpm             100571 non-null  Int32  
 11  engine_oil_temp        100571 non-null  float64
 12  engine_oil_pressure    100571 non-null  float64
 13  engine_coolant_temp    100571 non-null  float64
 14  boost_pressure         100571 non-nu

In [47]:
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df = df.set_index('datetime').sort_index()

In [None]:
# Resample to reduce browser load (e.g., average every 10s)
df_resampled = df.resample('10s').mean(numeric_only=True).reset_index()

# Plot interactive brake temperatures
fig = px.line(df_resampled, x='datetime',
              y=['brake_disc_temp_fl', 'brake_disc_temp_fr',
                 'brake_disc_temp_rl', 'brake_disc_temp_rr'],
              title='Brake Disc Temperature Evolution')
fig.show()