# 🚗 NYC Motor Vehicle Collisions - Advanced AI/ML Analysis

## Overview
This notebook provides comprehensive machine learning and deep learning analysis of NYC motor vehicle collision data.

**Key Features:**
- 📊 Exploratory Data Analysis (EDA)
- 🤖 Multiple ML Models (Random Forest, XGBoost, LightGBM)
- 🧠 Deep Learning with TensorFlow/Keras
- 📈 Advanced Visualizations
- 🎯 Feature Importance Analysis
- 🔮 Predictive Insights

---

## 1. Library Imports and Setup

In [None]:
# Core Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
from datetime import datetime

warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Machine Learning Libraries
try:
    from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
    from sklearn.model_selection import train_test_split, cross_val_score
    from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, accuracy_score
    from sklearn.preprocessing import StandardScaler
    ML_AVAILABLE = True
    print("✅ Scikit-learn imported successfully")
except ImportError as e:
    ML_AVAILABLE = False
    print(f"❌ Scikit-learn not available: {e}")

# Advanced ML Libraries
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("✅ XGBoost imported successfully")
except ImportError:
    XGB_AVAILABLE = False
    print("❌ XGBoost not available")

try:
    import lightgbm as lgb
    LGB_AVAILABLE = True
    print("✅ LightGBM imported successfully")
except ImportError:
    LGB_AVAILABLE = False
    print("❌ LightGBM not available")

# Deep Learning Libraries
try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    DL_AVAILABLE = True
    print(f"✅ TensorFlow {tf.__version__} imported successfully")
except ImportError:
    DL_AVAILABLE = False
    print("❌ TensorFlow not available")

print(f"\n🚀 Setup Complete!")
print(f"ML Available: {ML_AVAILABLE}")
print(f"XGBoost Available: {XGB_AVAILABLE}")
print(f"LightGBM Available: {LGB_AVAILABLE}")
print(f"Deep Learning Available: {DL_AVAILABLE}")

## 2. Data Loading and Initial Exploration

In [None]:
# Load the dataset
try:
    df = pd.read_csv('Motor_Vehicle_Collisions_-_Crashes.csv')
    print(f"✅ Dataset loaded successfully!")
    print(f"📊 Shape: {df.shape}")
    print(f"💾 Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
except FileNotFoundError:
    print("❌ Data file not found. Please ensure 'Motor_Vehicle_Collisions_-_Crashes.csv' is in the current directory.")
    print("📥 You can download it from: https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95")
    df = None

if df is not None:
    # Display basic info
    print("\n📋 Column Information:")
    print(df.info())
    
    print("\n🔍 First 5 rows:")
    display(df.head())

## 3. Data Preprocessing and Feature Engineering

In [None]:
if df is not None:
    print("🔧 PREPROCESSING DATA...")
    
    # Create a copy for processing
    data = df.copy()
    
    # Convert datetime columns
    data['CRASH_DATE'] = pd.to_datetime(data['CRASH_DATE'], errors='coerce')
    data['CRASH_TIME'] = pd.to_datetime(data['CRASH_TIME'], format='%H:%M', errors='coerce')
    
    # Extract enhanced time features
    data['year'] = data['CRASH_DATE'].dt.year
    data['month'] = data['CRASH_DATE'].dt.month
    data['day_of_week'] = data['CRASH_DATE'].dt.dayofweek
    data['hour'] = data['CRASH_TIME'].dt.hour
    data['day_name'] = data['CRASH_DATE'].dt.day_name()
    data['month_name'] = data['CRASH_DATE'].dt.month_name()
    data['quarter'] = data['CRASH_DATE'].dt.quarter
    
    # Enhanced binary features
    data['is_weekend'] = data['day_of_week'].isin([5, 6]).astype(int)
    data['is_rush_hour'] = ((data['hour'].between(7, 9)) | (data['hour'].between(17, 19))).astype(int)
    data['is_night'] = ((data['hour'] >= 22) | (data['hour'] <= 5)).astype(int)
    data['is_holiday_season'] = data['month'].isin([11, 12, 1]).astype(int)
    
    # Calculate casualties
    injury_cols = [col for col in data.columns if 'INJURED' in col]
    killed_cols = [col for col in data.columns if 'KILLED' in col]
    
    data['total_injured'] = data[injury_cols].sum(axis=1, skipna=True)
    data['total_killed'] = data[killed_cols].sum(axis=1, skipna=True)
    data['total_casualties'] = data['total_injured'] + data['total_killed']
    
    # Enhanced severity classification
    data['is_serious'] = ((data['total_killed'] > 0) | (data['total_injured'] >= 2)).astype(int)
    
    # Enhanced risk scoring
    data['risk_score'] = (
        data['total_killed'] * 10 + 
        data['total_injured'] * 3 + 
        data['is_rush_hour'] * 2 + 
        data['is_night'] * 1.5 + 
        data['is_weekend'] * 1.2 +
        data['is_holiday_season'] * 1.3
    )
    
    # Remove rows with missing critical data
    initial_rows = len(data)
    data = data.dropna(subset=['CRASH_DATE', 'hour'])
    final_rows = len(data)
    
    print(f"✅ Preprocessing complete!")
    print(f"📊 Removed {initial_rows - final_rows:,} rows with missing critical data")
    print(f"📊 Final dataset: {final_rows:,} rows")
    print(f"📈 Serious Accidents: {data['is_serious'].sum():,} ({data['is_serious'].mean():.1%})")

## 4. Exploratory Data Analysis

In [None]:
if df is not None:
    print("📊 EXPLORATORY DATA ANALYSIS")
    
    # Set up the plotting area
    fig, axes = plt.subplots(2, 3, figsize=(20, 12))
    fig.suptitle('NYC Motor Vehicle Collisions - Temporal Analysis', fontsize=16, fontweight='bold')
    
    # 1. Hourly Distribution
    hourly_data = data.groupby('hour').size()
    axes[0, 0].bar(hourly_data.index, hourly_data.values, color='skyblue', edgecolor='navy', alpha=0.7)
    axes[0, 0].set_title('Accidents by Hour of Day', fontweight='bold')
    axes[0, 0].set_xlabel('Hour')
    axes[0, 0].set_ylabel('Number of Accidents')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Daily Distribution
    daily_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    daily_data = data['day_name'].value_counts().reindex(daily_order)
    axes[0, 1].bar(range(len(daily_data)), daily_data.values, color='lightcoral', edgecolor='darkred', alpha=0.7)
    axes[0, 1].set_title('Accidents by Day of Week', fontweight='bold')
    axes[0, 1].set_xlabel('Day of Week')
    axes[0, 1].set_ylabel('Number of Accidents')
    axes[0, 1].set_xticks(range(len(daily_data)))
    axes[0, 1].set_xticklabels([day[:3] for day in daily_data.index], rotation=45)
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Monthly Distribution
    monthly_data = data.groupby('month').size()
    axes[0, 2].plot(monthly_data.index, monthly_data.values, marker='o', linewidth=3, markersize=8, color='green')
    axes[0, 2].set_title('Accidents by Month', fontweight='bold')
    axes[0, 2].set_xlabel('Month')
    axes[0, 2].set_ylabel('Number of Accidents')
    axes[0, 2].grid(True, alpha=0.3)
    axes[0, 2].set_xticks(range(1, 13))
    
    # 4. Risk Score Distribution
    axes[1, 0].hist(data['risk_score'], bins=50, color='orange', alpha=0.7, edgecolor='black')
    axes[1, 0].set_title('Risk Score Distribution', fontweight='bold')
    axes[1, 0].set_xlabel('Risk Score')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].grid(True, alpha=0.3)
    
    # 5. Casualty Analysis
    casualty_data = data['total_casualties'].value_counts().head(10)
    axes[1, 1].bar(range(len(casualty_data)), casualty_data.values, color='red', alpha=0.7)
    axes[1, 1].set_title('Casualty Distribution', fontweight='bold')
    axes[1, 1].set_xlabel('Number of Casualties')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].set_xticks(range(len(casualty_data)))
    axes[1, 1].set_xticklabels(casualty_data.index)
    axes[1, 1].grid(True, alpha=0.3)
    
    # 6. Serious vs Non-serious
    serious_counts = data['is_serious'].value_counts()
    axes[1, 2].pie(serious_counts.values, labels=['Non-serious', 'Serious'], autopct='%1.1f%%', 
                   colors=['lightgreen', 'red'], startangle=90)
    axes[1, 2].set_title('Accident Severity Distribution', fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    # Summary insights
    print("\n🔍 KEY INSIGHTS:")
    print(f"• Peak accident hour: {hourly_data.idxmax()}:00 with {hourly_data.max():,} accidents")
    print(f"• Safest hour: {hourly_data.idxmin()}:00 with {hourly_data.min():,} accidents")
    print(f"• Average risk score: {data['risk_score'].mean():.2f}")
    print(f"• Total casualties: {data['total_casualties'].sum():,}")

## 5. Machine Learning Model Training

In [None]:
if df is not None and ML_AVAILABLE:
    print("🤖 TRAINING MACHINE LEARNING MODELS")
    
    # Select features for ML
    feature_columns = ['hour', 'day_of_week', 'month', 'is_weekend', 'is_rush_hour', 
                      'is_night', 'is_holiday_season']
    
    # Add coordinates if available
    if 'LATITUDE' in data.columns and 'LONGITUDE' in data.columns:
        data['LATITUDE'] = pd.to_numeric(data['LATITUDE'], errors='coerce')
        data['LONGITUDE'] = pd.to_numeric(data['LONGITUDE'], errors='coerce')
        feature_columns.extend(['LATITUDE', 'LONGITUDE'])
    
    # Prepare the dataset
    ml_data = data[feature_columns + ['is_serious']].dropna()
    print(f"📊 ML Dataset shape: {ml_data.shape}")
    
    if len(ml_data) >= 1000:
        # Prepare features and target
        X = ml_data[feature_columns]
        y = ml_data['is_serious']
        
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        print(f"Training set: {X_train.shape[0]:,} samples")
        print(f"Test set: {X_test.shape[0]:,} samples")
        
        # Initialize models dictionary
        models = {}
        model_scores = {}
        
        # 1. Random Forest
        print("\n🌲 Training Random Forest...")
        rf_model = RandomForestClassifier(
            n_estimators=100,
            max_depth=15,
            random_state=42,
            n_jobs=-1,
            class_weight='balanced'
        )
        rf_model.fit(X_train, y_train)
        rf_pred_proba = rf_model.predict_proba(X_test)[:, 1]
        rf_score = roc_auc_score(y_test, rf_pred_proba)
        models['Random Forest'] = rf_model
        model_scores['Random Forest'] = rf_score
        print(f"   ✅ AUC Score: {rf_score:.4f}")
        
        # 2. Gradient Boosting
        print("🚀 Training Gradient Boosting...")
        gb_model = GradientBoostingClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=8,
            random_state=42
        )
        gb_model.fit(X_train, y_train)
        gb_pred_proba = gb_model.predict_proba(X_test)[:, 1]
        gb_score = roc_auc_score(y_test, gb_pred_proba)
        models['Gradient Boosting'] = gb_model
        model_scores['Gradient Boosting'] = gb_score
        print(f"   ✅ AUC Score: {gb_score:.4f}")
        
        # 3. XGBoost (if available)
        if XGB_AVAILABLE:
            print("⚡ Training XGBoost...")
            xgb_model = xgb.XGBClassifier(
                n_estimators=100,
                max_depth=8,
                learning_rate=0.1,
                random_state=42,
                n_jobs=-1,
                eval_metric='logloss'
            )
            xgb_model.fit(X_train, y_train)
            xgb_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
            xgb_score = roc_auc_score(y_test, xgb_pred_proba)
            models['XGBoost'] = xgb_model
            model_scores['XGBoost'] = xgb_score
            print(f"   ✅ AUC Score: {xgb_score:.4f}")
        
        # 4. LightGBM (if available)
        if LGB_AVAILABLE:
            print("💡 Training LightGBM...")
            lgb_model = lgb.LGBMClassifier(
                n_estimators=100,
                max_depth=8,
                learning_rate=0.1,
                random_state=42,
                n_jobs=-1,
                verbosity=-1
            )
            lgb_model.fit(X_train, y_train)
            lgb_pred_proba = lgb_model.predict_proba(X_test)[:, 1]
            lgb_score = roc_auc_score(y_test, lgb_pred_proba)
            models['LightGBM'] = lgb_model
            model_scores['LightGBM'] = lgb_score
            print(f"   ✅ AUC Score: {lgb_score:.4f}")
        
        # Best model
        best_model_name = max(model_scores.keys(), key=lambda k: model_scores[k])
        best_model = models[best_model_name]
        print(f"\n🏆 Best Model: {best_model_name} (AUC: {model_scores[best_model_name]:.4f})")
        
    else:
        print("❌ Insufficient data for ML training")
        
else:
    print("❌ ML libraries not available or no data")

## 6. Deep Learning Model (Optional)

In [None]:
if DL_AVAILABLE and 'X_train' in locals():
    print("🧠 TRAINING DEEP LEARNING MODEL")
    
    # Scale features for neural network
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create neural network model
    dl_model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        layers.Dropout(0.3),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    
    # Compile model
    dl_model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    print("🏗️ Model Architecture:")
    dl_model.summary()
    
    # Train model
    print("\n🚀 Training Neural Network...")
    history = dl_model.fit(
        X_train_scaled, y_train,
        batch_size=256,
        epochs=30,
        validation_split=0.2,
        verbose=1,
        callbacks=[
            keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
        ]
    )
    
    # Evaluate model
    dl_pred_proba = dl_model.predict(X_test_scaled).flatten()
    dl_score = roc_auc_score(y_test, dl_pred_proba)
    
    print(f"\n🎯 Deep Learning Model AUC Score: {dl_score:.4f}")
    
    # Add to model comparison
    if 'model_scores' in locals():
        model_scores['Deep Learning'] = dl_score
        models['Deep Learning'] = dl_model
    
else:
    print("❌ Deep Learning not available or ML training failed")

## 7. Model Comparison and Feature Importance

In [None]:
if 'model_scores' in locals() and model_scores:
    print("📊 MODEL COMPARISON")
    
    # Create comparison chart
    plt.figure(figsize=(12, 6))
    
    model_names = list(model_scores.keys())
    scores = list(model_scores.values())
    colors = ['skyblue', 'lightcoral', 'lightgreen', 'orange', 'purple'][:len(model_names)]
    
    bars = plt.bar(model_names, scores, color=colors, alpha=0.8, edgecolor='black')
    plt.title('Model Performance Comparison (AUC Scores)', fontsize=16, fontweight='bold')
    plt.xlabel('Models', fontsize=12)
    plt.ylabel('AUC Score', fontsize=12)
    plt.ylim(0.5, 1.0)
    plt.grid(True, alpha=0.3, axis='y')
    
    # Add value labels on bars
    for bar, score in zip(bars, scores):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{score:.4f}', ha='center', va='bottom', fontweight='bold')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Final model ranking
    print("\n🏆 FINAL MODEL RANKING:")
    ranked_models = sorted(model_scores.items(), key=lambda x: x[1], reverse=True)
    for i, (model_name, score) in enumerate(ranked_models, 1):
        medal = ['🥇', '🥈', '🥉'][i-1] if i <= 3 else f'{i}.'
        print(f"   {medal} {model_name}: {score:.4f}")
    
    # Feature importance for best tree-based model
    best_model_name = ranked_models[0][0]
    best_model = models[best_model_name]
    
    if hasattr(best_model, 'feature_importances_'):
        print(f"\n🔍 FEATURE IMPORTANCE ({best_model_name}):")
        
        feature_importance = best_model.feature_importances_
        
        # Create DataFrame for easy manipulation
        importance_df = pd.DataFrame({
            'Feature': feature_columns,
            'Importance': feature_importance
        }).sort_values('Importance', ascending=False)
        
        # Display feature importance
        for i, (_, row) in enumerate(importance_df.iterrows(), 1):
            print(f"   {i:2d}. {row['Feature']:15s}: {row['Importance']:.4f}")
        
        # Visualize feature importance
        plt.figure(figsize=(10, 6))
        plt.barh(range(len(importance_df)), importance_df['Importance'], color='steelblue', alpha=0.8)
        plt.yticks(range(len(importance_df)), importance_df['Feature'])
        plt.xlabel('Feature Importance')
        plt.title(f'Feature Importance - {best_model_name}')
        plt.grid(True, alpha=0.3, axis='x')
        plt.tight_layout()
        plt.show()
        
else:
    print("❌ No models were successfully trained")

## 8. Risk Analysis and Insights

In [None]:
if df is not None:
    print("⚠️ RISK ANALYSIS AND INSIGHTS")
    
    # Risk analysis by hour
    hourly_risk = data.groupby('hour').agg({
        'is_serious': ['count', 'mean'],
        'risk_score': 'mean'
    }).round(4)
    
    hourly_risk.columns = ['Total_Accidents', 'Serious_Rate', 'Avg_Risk_Score']
    hourly_risk = hourly_risk.reset_index()
    
    # Identify high-risk hours
    high_risk_threshold = hourly_risk['Serious_Rate'].quantile(0.75)
    high_risk_hours = hourly_risk[hourly_risk['Serious_Rate'] >= high_risk_threshold]['hour'].tolist()
    
    print(f"\n⚠️ HIGH-RISK HOURS (Top 25% by serious rate):")
    for hour in sorted(high_risk_hours):
        rate = hourly_risk[hourly_risk['hour'] == hour]['Serious_Rate'].iloc[0]
        total = hourly_risk[hourly_risk['hour'] == hour]['Total_Accidents'].iloc[0]
        print(f"   🕐 {hour:02d}:00 - Serious Rate: {rate:.1%}, Total: {total:,}")
    
    # Risk by conditions
    print(f"\n📊 RISK BY CONDITIONS:")
    conditions = {
        'Weekend vs Weekday': (data['is_weekend'] == 1, data['is_weekend'] == 0),
        'Rush Hour vs Normal': (data['is_rush_hour'] == 1, data['is_rush_hour'] == 0),
        'Night vs Day': (data['is_night'] == 1, data['is_night'] == 0),
        'Holiday Season vs Normal': (data['is_holiday_season'] == 1, data['is_holiday_season'] == 0)
    }
    
    for condition_name, (high_risk_mask, low_risk_mask) in conditions.items():
        high_risk_rate = data[high_risk_mask]['is_serious'].mean()
        low_risk_rate = data[low_risk_mask]['is_serious'].mean()
        ratio = high_risk_rate / low_risk_rate if low_risk_rate > 0 else float('inf')
        print(f"   {condition_name}: {high_risk_rate:.1%} vs {low_risk_rate:.1%} (Ratio: {ratio:.2f}x)")
    
    # Top risk factors visualization
    plt.figure(figsize=(15, 5))
    
    # Subplot 1: Hourly serious rate
    plt.subplot(1, 3, 1)
    plt.plot(hourly_risk['hour'], hourly_risk['Serious_Rate'], marker='o', linewidth=2, markersize=6)
    plt.title('Serious Accident Rate by Hour')
    plt.xlabel('Hour of Day')
    plt.ylabel('Serious Rate')
    plt.grid(True, alpha=0.3)
    
    # Subplot 2: Risk by day of week
    plt.subplot(1, 3, 2)
    daily_risk = data.groupby('day_name')['is_serious'].mean().reindex(
        ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    )
    plt.bar(range(len(daily_risk)), daily_risk.values, color='coral', alpha=0.7)
    plt.title('Serious Accident Rate by Day')
    plt.xlabel('Day of Week')
    plt.ylabel('Serious Rate')
    plt.xticks(range(len(daily_risk)), [day[:3] for day in daily_risk.index], rotation=45)
    plt.grid(True, alpha=0.3)
    
    # Subplot 3: Risk by month
    plt.subplot(1, 3, 3)
    monthly_risk = data.groupby('month')['is_serious'].mean()
    plt.plot(monthly_risk.index, monthly_risk.values, marker='s', linewidth=2, markersize=6, color='green')
    plt.title('Serious Accident Rate by Month')
    plt.xlabel('Month')
    plt.ylabel('Serious Rate')
    plt.xticks(range(1, 13))
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Key insights summary
    print(f"\n🔍 KEY INSIGHTS SUMMARY:")
    print(f"   • Most dangerous hour: {hourly_risk.loc[hourly_risk['Serious_Rate'].idxmax(), 'hour']}:00")
    print(f"   • Safest hour: {hourly_risk.loc[hourly_risk['Serious_Rate'].idxmin(), 'hour']}:00")
    print(f"   • Peak accident day: {daily_risk.idxmax()}")
    print(f"   • Safest day: {daily_risk.idxmin()}")
    print(f"   • Most dangerous month: {monthly_risk.idxmax()}")
    print(f"   • Safest month: {monthly_risk.idxmin()}")

## 9. Conclusions and Recommendations

In [None]:
print("📋 CONCLUSIONS AND RECOMMENDATIONS")
print("=" * 50)

if df is not None:
    print(f"\n📊 ANALYSIS SUMMARY:")
    print(f"   • Total Records Analyzed: {len(data):,}")
    print(f"   • Total Casualties: {data['total_casualties'].sum():,}")
    print(f"   • Serious Accident Rate: {data['is_serious'].mean():.1%}")
    print(f"   • Average Risk Score: {data['risk_score'].mean():.2f}")

if 'model_scores' in locals() and model_scores:
    best_score = max(model_scores.values())
    best_model_name = max(model_scores.keys(), key=lambda k: model_scores[k])
    print(f"\n🤖 MODEL PERFORMANCE:")
    print(f"   • Best Model: {best_model_name}")
    print(f"   • Best AUC Score: {best_score:.4f}")
    print(f"   • Models Trained: {len(model_scores)}")

print(f"\n🎯 KEY RECOMMENDATIONS:")
print(f"   1. 🚦 Enhanced Traffic Control:")
print(f"      - Deploy additional enforcement during peak risk hours")
print(f"      - Implement adaptive traffic signal timing")
print(f"      - Increase police presence in high-risk areas")

print(f"\n   2. 📱 Public Safety Campaigns:")
print(f"      - Target awareness campaigns during high-risk times")
print(f"      - Promote alternative transportation during peak hours")
print(f"      - Educate drivers about night-time driving risks")

print(f"\n   3. 🏗️ Infrastructure Improvements:")
print(f"      - Improve lighting in accident-prone areas")
print(f"      - Install additional safety barriers")
print(f"      - Optimize road design in high-risk zones")

print(f"\n   4. 📊 Data-Driven Decisions:")
print(f"      - Use ML predictions for resource allocation")
print(f"      - Implement real-time risk monitoring")
print(f"      - Develop early warning systems")

print(f"\n✅ NEXT STEPS:")
print(f"   • Deploy best model in production for real-time predictions")
print(f"   • Integrate with city traffic management systems")
print(f"   • Continuously retrain models with new data")
print(f"   • Validate predictions against actual outcomes")

print(f"\n🎉 ANALYSIS COMPLETE!")
print(f"This analysis provides actionable insights for improving traffic safety in NYC.")