In [7]:
import pandas as pd

# Load the processed data file (replace with your actual file path)
processed_data_path = '../data/processed_data.csv'
df = pd.read_csv(processed_data_path)

# Assign DriverId based on TripId (e.g., T-1 -> DriverId 1, T-2 -> DriverId 2)
df['DriverId'] = df['TripId'].apply(lambda x: int(x.split('-')[1]))

# Group by DriverId and aggregate data for each driver
driver_aggregated_data = df.groupby('DriverId').agg({
    'Speed(m/s)': ['mean', 'max', 'std'],
    'Acceleration(m/s^2)': ['mean', 'max', 'std'],
    'Heading_Change(degrees)': ['mean', 'max', 'std'],
    'Jerk(m/s^3)': ['mean', 'max', 'std'],
    'Braking_Intensity': ['mean', 'max', 'std'],
    'SASV': 'mean',
    'Speed_Violation': 'mean',
    'Time_Step': 'count',  # Total observations for the driver
    'Driving_Category': lambda x: x.mode()[0]  # Get the mode (most common category) for each driver
}).reset_index()

# Rename columns for clarity
driver_aggregated_data.columns = ['DriverId', 
                                  'Speed(m/s)_mean', 'Speed(m/s)_max', 'Speed(m/s)_std',
                                  'Acceleration(m/s^2)_mean', 'Acceleration(m/s^2)_max', 'Acceleration(m/s^2)_std',
                                  'Heading_Change(degrees)_mean', 'Heading_Change(degrees)_max', 'Heading_Change(degrees)_std',
                                  'Jerk(m/s^3)_mean', 'Jerk(m/s^3)_max', 'Jerk(m/s^3)_std',
                                  'Braking_Intensity_mean', 'Braking_Intensity_max', 'Braking_Intensity_std',
                                  'SASV_mean', 'Speed_Violation_mean', 'Total_Observations', 'Driving_Category']

# Save aggregated data for future bulk processing
aggregated_output_path = '../data/bulk_driver_aggregated_data.csv'
driver_aggregated_data.to_csv(aggregated_output_path, index=False)

print(f"Aggregated data by driver saved to {aggregated_output_path}")


Aggregated data by driver saved to ../data/bulk_driver_aggregated_data.csv
