In [0]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import os

# Load processed merged data (from notebook 01)
df = pd.read_csv("data/processed/merged_features.csv", parse_dates=["timestamp"])

In [0]:
# ---- Feature Distribution Plots ----

# Ensure the output directory exists
os.makedirs("output", exist_ok=True)

# Speed distribution
plt.figure(figsize=(8, 4))
df["speed_kph"].plot(kind="hist", bins=40, alpha=0.7)
plt.title("Speed Distribution (km/h)")
plt.xlabel("Speed (km/h)")
plt.ylabel("Count")
plt.grid(True)
plt.tight_layout()
plt.savefig("output/speed_distribution.png")
plt.show()

In [0]:
# Acceleration magnitude
plt.figure(figsize=(8, 4))
df["acc_mag"].plot(kind="hist", bins=40, alpha=0.7)
plt.title("Acceleration Magnitude Distribution")
plt.xlabel("Acceleration (m/s^2)")
plt.ylabel("Count")
plt.grid(True)
plt.tight_layout()
plt.savefig("output/acc_mag_distribution.png")
plt.show()

In [0]:
# Count of events (harsh braking, acceleration, turns, speeding)
events = ["harsh_brake", "harsh_accel", "harsh_turn", "speeding", "risky"]
event_counts = df[events].sum()
plt.figure(figsize=(7, 4))
event_counts.plot(kind="bar")
plt.title("Count of Detected Driving Events")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("output/driving_events_counts.png")
plt.show()

In [0]:
# Correlation matrix of features
plt.figure(figsize=(8, 6))
corr = df[["speed_kph", "acc_mag", "acc_north", "acc_east", "yaw_rate", "harsh_brake", "harsh_accel", "harsh_turn", "sharp_turn", "risky"]].corr()
import seaborn as sns
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Feature Correlation Matrix")
plt.tight_layout()
plt.savefig("output/feature_correlation_matrix.png")
plt.show()

In [0]:
# 5. Save the feature-engineered data again for downstream use
df.to_csv("data/processed/feature_engineered.csv", index=False)

print("Feature engineering and EDA complete.")