# **Install and Import Libraries**

In [46]:
!pip install pandas numpy scikit-learn folium matplotlib
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report



# Mount Google Drive

In [24]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Upload The Dataset

In [25]:
file_path = '/content/drive/MyDrive/US_Accidents_March23.csv'
data = pd.read_csv(file_path, nrows=100000)
data.head()


Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,Source2,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,Source2,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,Source2,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,0.01,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,Source2,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,0.01,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,Source2,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day


# Data Cleaning (Simplify & Prepare)

In [47]:
df = data[['Severity', 'Start_Lat', 'Start_Lng', 'Temperature(F)',
           'Visibility(mi)', 'Wind_Speed(mph)', 'Distance(mi)', 'Weather_Condition']].dropna()

df['Weather_Condition'] = df['Weather_Condition'].str.extract('(Rain|Snow|Fog|Clear|Cloud|Storm)', expand=False)
df['Weather_Condition'].fillna('Clear', inplace=True)

print(" Cleaned Data:")
df.head()


 Cleaned Data:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Weather_Condition'].fillna('Clear', inplace=True)


Unnamed: 0,Severity,Start_Lat,Start_Lng,Temperature(F),Visibility(mi),Wind_Speed(mph),Distance(mi),Weather_Condition
2,2,39.063148,-84.032608,36.0,10.0,3.5,0.01,Clear
3,3,39.747753,-84.205582,35.1,9.0,4.6,0.01,Cloud
4,2,39.627781,-84.188354,36.0,6.0,3.5,0.01,Cloud
5,3,40.10059,-82.925194,37.9,7.0,3.5,0.01,Rain
6,2,39.758274,-84.230507,34.0,7.0,3.5,0.0,Clear


# Convert Categorical Data → Numbers

In [28]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Weather_Condition'] = le.fit_transform(df['Weather_Condition'])


# Split into Features and Target

In [30]:
X = df[['Start_Lat','Start_Lng','Temperature(F)','Visibility(mi)',
        'Wind_Speed(mph)','Distance(mi)','Weather_Condition']]
y = df['Severity']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train ML Model (Random Forest)

In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(" Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


 Accuracy: 0.8713516388537996

Classification Report:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00        11
           2       0.90      0.87      0.88      8278
           3       0.84      0.88      0.86      6745
           4       0.00      0.00      0.00         7

    accuracy                           0.87     15041
   macro avg       0.43      0.44      0.44     15041
weighted avg       0.87      0.87      0.87     15041



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Visualize Accident Hotspots on Map (WOW Factor )

In [49]:
import folium

sample_df = df.sample(300)

center_lat = sample_df['Start_Lat'].mean()
center_lng = sample_df['Start_Lng'].mean()
m = folium.Map(location=[center_lat, center_lng], zoom_start=6)

def color(severity):
    if severity == 1:
        return 'green'
    elif severity == 2:
        return 'orange'
    elif severity == 3:
        return 'red'
    else:
        return 'darkred'

for _, row in sample_df.iterrows():
    folium.CircleMarker(
        location=[row['Start_Lat'], row['Start_Lng']],
        radius=3,
        color=color(row['Severity']),
        fill=True,
        fill_opacity=0.7
    ).add_to(m)

m


# Visualizing Accident Hotspots on an Interactive Map

In [35]:
import folium
from IPython.display import IFrame

sample_df = df.sample(300)
sample_df['Start_Lat'] = pd.to_numeric(sample_df['Start_Lat'], errors='coerce')
sample_df['Start_Lng'] = pd.to_numeric(sample_df['Start_Lng'], errors='coerce')
sample_df = sample_df.dropna(subset=['Start_Lat', 'Start_Lng'])

center_lat = sample_df['Start_Lat'].mean()
center_lng = sample_df['Start_Lng'].mean()
m = folium.Map(location=[center_lat, center_lng], zoom_start=6, tiles='CartoDB positron')

def color(severity):
    if severity == 1:
        return 'green'
    elif severity == 2:
        return 'orange'
    elif severity == 3:
        return 'red'
    else:
        return 'darkred'

for _, row in sample_df.iterrows():
    folium.CircleMarker(
        location=[row['Start_Lat'], row['Start_Lng']],
        radius=3,
        color=color(int(row['Severity'])),
        fill=True,
        fill_opacity=0.7
    ).add_to(m)
m




# Encoding Weather Conditions for Model Input

In [40]:
from sklearn.preprocessing import LabelEncoder
import folium
import numpy as np
import pandas as pd

possible_weather = ['Clear', 'Rain', 'Snow', 'Fog', 'Cloud', 'Storm']

le = LabelEncoder()
le.fit(possible_weather)
print("Encoder classes:", le.classes_)


Encoder classes: ['Clear' 'Cloud' 'Fog' 'Rain' 'Snow' 'Storm']


Smart Route Risk Prediction (Delhi → Agra)
This section simulates a real driving route between Delhi and Agra.
The trained ML model predicts the accident risk for each route segment.
Each point is color-coded according to predicted risk:
- 🟢 Low risk  
- 🟠 Moderate risk  
- 🔴 High risk  
- ⚫ Very high risk

In [41]:
route_points = [
    (28.6139, 77.2090),
    (28.3000, 77.3500),
    (27.9000, 77.5500),
    (27.6000, 77.7000),
    (27.2000, 77.9500)
]

route_df = pd.DataFrame(route_points, columns=['Start_Lat', 'Start_Lng'])
route_df['Temperature(F)'] = np.random.randint(75, 95, len(route_points))
route_df['Visibility(mi)'] = np.random.randint(5, 10, len(route_points))
route_df['Wind_Speed(mph)'] = np.random.randint(1, 8, len(route_points))
route_df['Distance(mi)'] = np.random.uniform(1.0, 5.0, len(route_points))

clear_code = le.transform(['Clear'])[0]
route_df['Weather_Condition'] = [clear_code] * len(route_points)

route_df['Predicted_Risk'] = model.predict(route_df)

m_route = folium.Map(location=[27.9, 77.5], zoom_start=7, tiles='CartoDB positron')

def risk_color(r):
    if r == 1:
        return 'green'
    elif r == 2:
        return 'orange'
    elif r == 3:
        return 'red'
    else:
        return 'darkred'

for _, row in route_df.iterrows():
    folium.CircleMarker(
        location=[row['Start_Lat'], row['Start_Lng']],
        radius=6,
        color=risk_color(int(row['Predicted_Risk'])),
        fill=True,
        fill_opacity=0.8,
        popup=f"Risk Level: {int(row['Predicted_Risk'])}"
    ).add_to(m_route)

folium.PolyLine(route_points, color='blue', weight=2, opacity=0.5).add_to(m_route)

m_route
