## Load and Inspect the Dataset ##

In [None]:
!pip install opendatasets
!pip install folium


In [None]:
import pandas as pd

# Load first 100,000 rows to save memory
df = pd.read_csv("US_Accidents_March23.csv", nrows=100000)

# Preview data
df.shape, df.columns.tolist(), df.head()


## Basic Cleaning + Column Selection ##

In [None]:
# Drop rows with missing key info
df = df[['Severity', 'Start_Time', 'Weather_Condition', 'State', 'Start_Lat', 'Start_Lng']].dropna()

# Convert Start_Time to datetime
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

# Extract hour
df['Hour'] = df['Start_Time'].dt.hour
df.head()


## Visualize Severity Distribution ##

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(x='Severity', data=df, palette='viridis')
plt.title("Accident Severity Distribution")
plt.xlabel("Severity (1 = Least, 4 = Most)")
plt.ylabel("Number of Accidents")
plt.show()


## Top Weather Conditions ##

In [None]:
df['Weather_Condition'].value_counts().head(10).plot(kind='barh', color='skyblue')
plt.title("Top 10 Weather Conditions during Accidents")
plt.xlabel("Number of Accidents")
plt.ylabel("Weather Condition")
plt.show()


## Accidents by Hour of Day ##

In [None]:
sns.histplot(df['Hour'], bins=24, kde=False, color='orange')
plt.title("Accidents by Hour of Day")
plt.xlabel("Hour")
plt.ylabel("Number of Accidents")
plt.xticks(range(0, 24))
plt.grid(True)
plt.show()


# Heatmap #

In [None]:
import folium
from folium.plugins import HeatMap

# Sample for performance
sample_df = df.sample(5000)

# Create base map
map_ = folium.Map(location=[39.5, -98.35], zoom_start=4)

# Add heatmap
heat_data = [[row['Start_Lat'], row['Start_Lng']] for index, row in sample_df.iterrows()]
HeatMap(heat_data).add_to(map_)

map_
