In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('../data/driving_data.csv')

# Display the first few rows of the dataset
data.head()

# Feature Engineering
# Calculate the average speed, maximum acceleration, and total heading change for each trip
trip_features = data.groupby('TripId').agg(
    avg_speed=('Speed(km/h)', 'mean'),
    max_acceleration=('Acceleration(m/s^2)', 'max'),
    total_heading_change=('Heading_Change(degrees)', 'sum')
).reset_index()

# Label Encoding - Categorizing drivers into 4 classes based on behavior
def categorize_behavior(row):
    if row['avg_speed'] > 60 and row['max_acceleration'] > 0.5 and row['total_heading_change'] > 30:
        return 'Aggressive'
    elif row['avg_speed'] > 50 and row['max_acceleration'] > 0.3 and row['total_heading_change'] > 20:
        return 'Average'
    elif row['avg_speed'] < 50 and row['max_acceleration'] < 0.3 and row['total_heading_change'] < 10:
        return 'Cautious'
    else:
        return 'Moderate'

trip_features['driving_behavior'] = trip_features.apply(categorize_behavior, axis=1)

# Mapping behavior to numeric labels
behavior_mapping = {'Aggressive': 3, 'Average': 2, 'Cautious': 1, 'Moderate': 0}
trip_features['behavior_label'] = trip_features['driving_behavior'].map(behavior_mapping)

# Splitting features and labels
X = trip_features[['avg_speed', 'max_acceleration', 'total_heading_change']]
y = trip_features['behavior_label']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Saving preprocessed data for model training
X_train_df = pd.DataFrame(X_train, columns=['avg_speed', 'max_acceleration', 'total_heading_change'])
X_test_df = pd.DataFrame(X_test, columns=['avg_speed', 'max_acceleration', 'total_heading_change'])

X_train_df.to_csv('../data/X_train.csv', index=False)
X_test_df.to_csv('../data/X_test.csv', index=False)
y_train.to_csv('../data/y_train.csv', index=False)
y_test.to_csv('../data/y_test.csv', index=False)

print("Data Preprocessing Completed.")


Data Preprocessing Completed.
