Import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")


Load dataset

In [2]:
df = pd.read_csv("US_Accidents_March23.csv")
df.head()

Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,Source2,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,Source2,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,Source2,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,0.01,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,Source2,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,0.01,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,Source2,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day


Overview

In [None]:
# Shape and columns
print("Shape of dataset:", df.shape)
print("\nColumns:", df.columns.tolist())

# Data types
print("\nData types:")
print(df.dtypes.head(20))

# Missing values
print("\nMissing values:")
print(df.isnull().sum().head(20))

# Basic statistics
print("\nBasic statistics:")
df.describe(include="all").transpose().head(15)


Shape of dataset: (7728394, 46)

Columns: ['ID', 'Source', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat', 'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)', 'Description', 'Street', 'City', 'County', 'State', 'Zipcode', 'Country', 'Timezone', 'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity', 'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight', 'Astronomical_Twilight']

Data types:
ID                    object
Source                object
Severity               int64
Start_Time            object
End_Time              object
Start_Lat            float64
Start_Lng            float64
End_Lat              float64
End_Lng              float64
Distance(mi)         float64
Descr

Conversion of dataset to chart

In [None]:
df["Start_Time"] = pd.to_datetime(df["Start_Time"], format="mixed", errors="coerce")
df = df.dropna(subset=["Start_Time"])

df["Year"] = df["Start_Time"].dt.year
df["Month"] = df["Start_Time"].dt.month
df["Day"] = df["Start_Time"].dt.day
df["Hour"] = df["Start_Time"].dt.hour
df["DayOfWeek"] = df["Start_Time"].dt.day_name()

df[["Start_Time", "Year", "Month", "Day", "Hour", "DayOfWeek"]].head()



Number of accidents per year

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(data=df, x="Year", palette="viridis")
plt.title("Number of Accidents per Year")
plt.xticks(rotation=45)
plt.show()

Number of accidents per month

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(data=df, x="Month", palette="coolwarm")
plt.title("Number of Accidents per Month")
plt.show()

Number of accidents per hour

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(data=df, x="Hour", palette="magma")
plt.title("Number of Accidents by Hour of the Day")
plt.show()

Accidents by state

In [None]:
plt.figure(figsize=(12,6))
top_states = df['State'].value_counts().head(10)
sns.barplot(x=top_states.index, y=top_states.values, palette="viridis")
plt.title("Top 10 States with Most Accidents")
plt.ylabel("Number of Accidents")
plt.show()

Accidents on map

In [None]:
# Only for a sample to reduce size
df_sample = df.sample(5000)

fig = px.scatter_mapbox(df_sample,
                        lat="Start_Lat",
                        lon="Start_Lng",
                        hover_name="City",
                        hover_data=["State","Severity"],
                        color="Severity",
                        zoom=3,
                        height=600,
                        color_continuous_scale=px.colors.cyclical.IceFire)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(title="US Accidents Map (Sample 5k Rows)")
fig.show()


Weather condition analysis

In [None]:
plt.figure(figsize=(12,6))
top_weather = df['Weather_Condition'].value_counts().head(15)
sns.barplot(y=top_weather.index, x=top_weather.values, palette="coolwarm")
plt.title("Top 15 Weather Conditions During Accidents")
plt.xlabel("Number of Accidents")
plt.show()

In [None]:
import os
os.getcwd()
