# Module 5: Driving Violations Detection (Classification)

This notebook covers:
- Detecting violations from telematics features
- Handling class imbalance
- Training Random Forest and XGBoost classifiers
- Precision/Recall evaluation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc

# Load data
data_dir = Path('../data')
telematics = pd.read_csv(data_dir / 'telematics.csv')

# Create violation flag
telematics['violation'] = (
    (telematics['hard_brake'] == 1) | 
    (telematics['overspeed'] == 1) | 
    (telematics['harsh_turn'] == 1)
).astype(int)

print("Violation Distribution:")
print(telematics['violation'].value_counts())
print(f"Violation Rate: {telematics['violation'].mean():.2%}")

In [None]:
# Prepare data
feature_cols = ['speed', 'throttle', 'brake', 'steering_angle', 'accel_x', 'accel_y', 
                'trip_duration_sec', 'distance_km']
X = telematics[feature_cols]
y = telematics['violation']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train Random Forest with balanced weights
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, class_weight='balanced', random_state=42)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
y_pred_proba = rf_model.predict_proba(X_test)[:, 1]

print("=== Classification Report ===")
print(classification_report(y_test, y_pred, target_names=['No Violation', 'Violation']))

In [None]:
# Confusion Matrix and ROC
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['No Violation', 'Violation'], yticklabels=['No Violation', 'Violation'])
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')
axes[0].set_title('Confusion Matrix')

fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
axes[1].plot(fpr, tpr, label=f'AUC = {roc_auc:.3f}')
axes[1].plot([0, 1], [0, 1], 'k--')
axes[1].set_xlabel('False Positive Rate')
axes[1].set_ylabel('True Positive Rate')
axes[1].set_title('ROC Curve')
axes[1].legend()

plt.tight_layout()
plt.show()

# Save model
import joblib
joblib.dump(rf_model, Path('../src/violation_model.joblib'))
print("âœ“ Saved violation_model.joblib")