In [5]:
import pandas as pd

# Load the CSV file
file_path = 'cleaned_crash_data.csv'  # Update this path as needed
df = pd.read_csv(file_path)

# Convert date columns to datetime for analysis
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])

# Extract time components
df['Year'] = df['start_time'].dt.year
df['Month'] = df['start_time'].dt.month
df['Day'] = df['start_time'].dt.day
df['Hour'] = df['start_time'].dt.hour
df['Weekday'] = df['start_time'].dt.day_name()

# Define a function to classify season based on month
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

df['Season'] = df['Month'].apply(get_season)

# Summary by state
summary_by_state = df['state'].value_counts().reset_index()
summary_by_state.columns = ['State', 'Accident_Count']

# Summary by city
summary_by_city = df.groupby(['state', 'city']).size().reset_index(name='Accident_Count')

# Yearly summary
yearly_summary = df.groupby('Year').size().reset_index(name='Accident_Count')

# Monthly summary
monthly_summary = df.groupby(['Year', 'Month']).size().reset_index(name='Accident_Count')

# Seasonal summary
seasonal_summary = df.groupby('Season').size().reset_index(name='Accident_Count')

# Example output to verify
print("Summary by State:\n", summary_by_state)
print("\nYearly Summary:\n", yearly_summary)
print("\nMonthly Summary:\n", monthly_summary)
print("\nSeasonal Summary:\n", seasonal_summary)

Summary by State:
    State  Accident_Count
0     CA         1741433
1     FL          880192
2     TX          582837
3     SC          382557
4     NY          347960
5     NC          338199
6     VA          303301
7     PA          296620
8     MN          192084
9     OR          179660
10    AZ          170609
11    GA          169234
12    IL          168958
13    TN          167388
14    MI          162191
15    LA          149701
16    NJ          140719
17    MD          140417
18    OH          118115
19    WA          108221
20    AL          101044
21    UT           97079
22    CO           90885
23    OK           83647
24    MO           77323
25    CT           71005
26    IN           67224
27    MA           61996
28    WI           34688
29    KY           32254
30    NE           28870
31    MT           28496
32    IA           26307
33    AR           22780
34    NV           21665
35    KS           20992
36    DC           18630
37    RI           16971
38    

In [4]:
print(df.columns.tolist())


['id', 'source', 'severity', 'start_time', 'end_time', 'start_lat', 'start_lng', 'end_lat', 'end_lng', 'distance(mi)', 'description', 'street', 'city', 'county', 'state', 'zipcode', 'country', 'timezone', 'airport_code', 'weather_timestamp', 'temperature(f)', 'wind_chill(f)', 'humidity(%)', 'pressure(in)', 'visibility(mi)', 'wind_direction', 'wind_speed(mph)', 'precipitation(in)', 'weather_condition', 'amenity', 'bump', 'crossing', 'give_way', 'junction', 'no_exit', 'railway', 'roundabout', 'station', 'stop', 'traffic_calming', 'traffic_signal', 'turning_loop', 'sunrise_sunset', 'civil_twilight', 'nautical_twilight', 'astronomical_twilight']


In [7]:
import pandas as pd
import streamlit as st

# Load the CSV file
file_path = 'cleaned_crash_data.csv'  # Update this path as needed
df = pd.read_csv(file_path)

# Convert date columns to datetime for analysis
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])

# Extract time components
df['Year'] = df['start_time'].dt.year
df['Month'] = df['start_time'].dt.month
df['Day'] = df['start_time'].dt.day
df['Hour'] = df['start_time'].dt.hour
df['Weekday'] = df['start_time'].dt.day_name()

# Define a function to classify season based on month
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

df['Season'] = df['Month'].apply(get_season)

# Streamlit App
st.title("US Accident Data Analysis")

# State selection
state_list = sorted(df['state'].dropna().unique())
selected_state = st.selectbox("Select a State", state_list)

# City selection based on state
filtered_df = df[df['state'] == selected_state]
city_list = sorted(filtered_df['city'].dropna().unique())
selected_city = st.selectbox("Select a City", city_list)

# Filtered data for selected city
city_data = filtered_df[filtered_df['city'] == selected_city]

st.subheader(f"Analytics for {selected_city}, {selected_state}")

# Day of the week analysis
weekday_counts = city_data['Weekday'].value_counts().reset_index()
weekday_counts.columns = ['Weekday', 'count']
st.bar_chart(data=weekday_counts, x='Weekday', y='count', use_container_width=True)

# Hour of the day analysis
hourly_counts = city_data['Hour'].value_counts().sort_index().reset_index()
hourly_counts.columns = ['Hour', 'count']
st.line_chart(data=hourly_counts, x='Hour', y='count', use_container_width=True)

# Street-level analysis
if 'street' in city_data.columns:
    top_streets = city_data['street'].value_counts().head(10).reset_index()
    top_streets.columns = ['Street', 'count']
    st.write("Top 10 Streets with Most Accidents")
    st.bar_chart(data=top_streets, x='Street', y='count', use_container_width=True)

# Yearly trend
yearly_summary = city_data.groupby('Year').size().reset_index(name='Accident_Count')
st.line_chart(data=yearly_summary, x='Year', y='Accident_Count', use_container_width=True)

# Monthly trend
monthly_summary = city_data.groupby('Month').size().reset_index(name='Accident_Count')
st.line_chart(data=monthly_summary, x='Month', y='Accident_Count', use_container_width=True)

# Seasonal trend
seasonal_summary = city_data['Season'].value_counts().reset_index()
seasonal_summary.columns = ['Season', 'count']
st.bar_chart(data=seasonal_summary, x='Season', y='count', use_container_width=True)


DeltaGenerator()