<a href="https://colab.research.google.com/github/aishwarya-shekar-babu/Road-Accident-Prediction/blob/main/Road_Accident_Prediction_finalmodels_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning Prediction of Road Accident Severity with Urban factors and Weather conditions

## Research question


1.   **Research Question 1:** which machine learning approach is optimal in accurately predict the severity of UK road accidents using environmental (e.g. weather), temporal (e.g. time of day, season), vehicle data and spatial (e.g. urban/rural), and which features contribute most to severe outcomes?



In [None]:
from google.colab import drive
drive.mount('/content/drive')

# LOADING DATASET FROM GITHUB

# IMPORTING LIBRARIES

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import folium
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.metrics import precision_recall_curve


In [None]:

import joblib
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# === LOAD MODEL & ENCODERS ===
best_xgb = joblib.load("xgboost_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")
onehot_encode_cols = joblib.load("onehot_cols.pkl")
X_columns = joblib.load("X_columns.pkl")
print("✅ Model, encoders, and column metadata loaded.")

# === OPTIONS ===
season_opts = ['Autumn', 'Spring', 'Summer', 'Winter']
time_opts = ['Daytime', 'Evening_Peak', 'Late_Night', 'Morning_Peak', 'Night']
urban_opts = ['Rural', 'Unknown', 'Urban']
speed_opts = ['High_70', 'Low_30', 'Mid_50']
weather_opts = ['Fine', 'Raining', 'Snowing_Ice', 'Fine+high winds', 'Other_Adverse', 'Unknown', 'Fog']
light_opts = ['Darkness_Lights_On', 'Daylight', 'Dark_No_Lighting', 'Darkness_Lights_Off', 'Unknown']
surface_opts = ['Dry', 'Wet_or_Damp', 'Snow_Ice', 'Flood', 'Unknown']
road_type_opts = ['Dual_Carriageway', 'Single_Carriageway', 'One_Way_Street', 'Roundabout', 'Slip_Road', 'Unknown']
day_opts = ['Wednesday', 'Thursday', 'Friday', 'Monday', 'Tuesday', 'Sunday', 'Saturday']
age_band_opts = ['36 - 45', '26 - 35', '46 - 55', '66 - 75', '16 - 20', '56 - 65', 'Unknown',
                 '21 - 25', 'Over 75', '11 - 15', '6 - 10', '0 - 5']
sex_opts = ['Male', 'Female', 'Unknown']
vehicle_type_opts = ['Bus or coach (17 or more pass seats)', 'Car', 'Motorcycle 125cc and under',
                     'Motorcycle over 500cc','Motorcycle over 125cc and up to 500cc', 'Taxi/Private hire car',
                     'Goods 7.5 tonnes mgw and over','Van / Goods 3.5 tonnes mgw or under', 'Motorcycle 50cc and under',
                     'Goods over 3.5t. and under 7.5t','Minibus (8 - 16 passenger seats)', 'Other vehicle',
                     'Agricultural vehicle', 'Pedal cycle', 'Tram','Ridden horse', 'Unknown',
                     'Motorcycle - unknown cc', 'Mobility scooter','Goods vehicle - unknown weight', 'Electric motorcycle']
left_drive_opts = ['No', 'Yes', 'Unknown']

# === WIDGETS ===
casualties = widgets.IntSlider(description='Casualties', min=1, max=10, value=1)
vehicles = widgets.IntSlider(description='Vehicles', min=1, max=10, value=1)
hour = widgets.IntSlider(description='Hour', min=0, max=23, value=12)
month = widgets.Dropdown(description='Month', options=list(range(1, 13)), value=6)
year = widgets.Dropdown(description='Year', options=list(range(2005, 2026)), value=2023)
day = widgets.Dropdown(description='Day of Week', options=day_opts)
road_type = widgets.Dropdown(description='Road Type', options=road_type_opts)
speed_bin = widgets.Dropdown(description='Speed Bin', options=speed_opts)
urban = widgets.Dropdown(description='Urban/Rural', options=urban_opts)
weather = widgets.Dropdown(description='Weather', options=weather_opts)
light = widgets.Dropdown(description='Lighting', options=light_opts)
surface = widgets.Dropdown(description='Surface', options=surface_opts)
season = widgets.Dropdown(description='Season', options=season_opts)
time_of_day = widgets.Dropdown(description='Time of Day', options=time_opts)
age_band = widgets.Dropdown(description='Driver Age', options=age_band_opts)
sex = widgets.Dropdown(description='Sex of Driver', options=sex_opts)
imd = widgets.FloatSlider(description='IMD Decile', min=1, max=10, step=1, value=5.0)
vehicle_type = widgets.Dropdown(description='Vehicle Type', options=vehicle_type_opts)
left_drive = widgets.Dropdown(description='Left-Hand Drive', options=left_drive_opts)
vehicle_age = widgets.IntSlider(description='Vehicle Age', min=0, max=30, value=5)
engine_cc = widgets.IntSlider(description='Engine CC', min=500, max=5000, step=100, value=1600)

predict_btn = widgets.Button(description="🚦 Predict", button_style='success')
output_box = widgets.Output()

# === LAYOUT ===
form_widgets = widgets.VBox([
    casualties, vehicles, hour, month, year, day,
    road_type, speed_bin, urban, weather, light, surface,
    season, time_of_day, age_band, sex, imd,
    vehicle_type, left_drive, vehicle_age, engine_cc,
    predict_btn, output_box
])

# === CALLBACK ===
def on_predict_clicked(b):
    input_data = {
        'Number_of_Casualties': casualties.value,
        'Number_of_Vehicles': vehicles.value,
        'Hour': hour.value,
        'Month': month.value,
        'Year': year.value,
        'Season': season.value,
        'Time_of_Day': time_of_day.value,
        'Urban_Rural_Label': urban.value,
        'Speed_Bin': speed_bin.value,
        'Weather_Label': weather.value,
        'Light_Label': light.value,
        'Road_Surface_Label': surface.value,
        'Road_Type_Label': road_type.value,
        'Day_of_Week_Label': day.value,
        'Age_Band_of_Driver': age_band.value,
        'Sex_of_Driver': sex.value,
        'Vehicle_Type': vehicle_type.value,
        'Was_Vehicle_Left_Hand_Drive': left_drive.value,
        'Age_of_Vehicle': vehicle_age.value,
        'Driver_IMD_Decile': imd.value,
        'Engine_Capacity_.CC.': engine_cc.value,
        'Vehicle_Reference': 12345
    }

    df = pd.DataFrame([input_data])

    # === Apply Encoding (same as training) ===
    for col in label_encoders:
        if col in df.columns:
            df[col] = label_encoders[col].transform(df[col])

    df = pd.get_dummies(df, columns=onehot_encode_cols)

    # Add missing columns to match training data
    for col in X_columns:
        if col not in df.columns:
            df[col] = 0
    df = df[X_columns]

    # === Prediction ===
    proba = best_xgb.predict_proba(df)[0][1]
    threshold = 0.6
    label = "Severe/Fatal" if proba >= threshold else "Slight"
    emoji = "🔴" if label == "Severe/Fatal" else "🟢"
    color = "#ffdddd" if label == "Severe/Fatal" else "#ddffdd"

    with output_box:
        clear_output()
        display(HTML(f"""
            <div style="
                border: 2px solid #333;
                border-radius: 10px;
                padding: 15px;
                background-color: {color};
                font-family: Arial, sans-serif;
                width: 420px;
            ">
                <h3 style="margin-top:0; color:#333;">📊 Prediction Summary</h3>
                <p style="font-size:18px; font-weight:bold;">
                    Label: {emoji} <span style="color:{'red' if label=='Severe/Fatal' else 'green'};">{label}</span>
                </p>
                <p style="font-size:16px;">
                    Probability of Severe/Fatal: <b>{round(proba * 100, 2)}%</b>
                </p>
            </div>
        """))

predict_btn.on_click(on_predict_clicked)

# === DISPLAY ===
display(HTML("<h2>🚧 Accident Severity Prediction Tool</h2>"))
display(form_widgets)

