In [31]:
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
import plotly.express as px
import pandas as pd
from dash.dependencies import Output,Input,State
import numpy as np
import plotly.graph_objects as go


In [2]:
acc_df = pd.read_csv(r'data\US_Accidents.csv')
state = pd.read_csv("data\States.csv")


In [3]:
acc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1516064 entries, 0 to 1516063
Data columns (total 47 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   ID                     1516064 non-null  object 
 1   Severity               1516064 non-null  int64  
 2   Start_Time             1516064 non-null  object 
 3   End_Time               1516064 non-null  object 
 4   Start_Lat              1516064 non-null  float64
 5   Start_Lng              1516064 non-null  float64
 6   End_Lat                1516064 non-null  float64
 7   End_Lng                1516064 non-null  float64
 8   Distance(mi)           1516064 non-null  float64
 9   Description            1516064 non-null  object 
 10  Number                 469969 non-null   float64
 11  Street                 1516064 non-null  object 
 12  Side                   1516064 non-null  object 
 13  City                   1515981 non-null  object 
 14  County            

In [4]:
columns= ['Severity' ,"Start_Lat" ,"Start_Lng" , 'Start_Time', 'End_Time', 'Distance(mi)',
          'City', 'County', 'State', 'Weather_Timestamp', 'Weather_Condition', 'Sunrise_Sunset']
acc_df = acc_df[columns]

In [5]:
acc_df=acc_df.sample(200000 , random_state=0,axis=0 )

In [6]:
acc_df=acc_df.dropna()

In [7]:
acc_df.to_csv(r'data\sampled.csv') #Sampled data  after preprocessing for future use 

# If Sampled File Exists

In [35]:
acc_df = pd.read_csv(r'data\sampled.csv')

In [36]:
acc_df["Start_Time"]=acc_df["Start_Time"].astype("datetime64")
acc_df['End_Time']  = acc_df['End_Time'].astype("datetime64")
acc_df['year'] = pd.DatetimeIndex(acc_df['Start_Time']).year.astype('int')
acc_df['month'] = pd.DatetimeIndex(acc_df['Start_Time']).month.astype('int')
acc_df['hour'] = pd.DatetimeIndex(acc_df['Start_Time']).hour.astype('int')
acc_df['year_month'] = acc_df['Start_Time'].dt.strftime('%Y-%m')
acc_df["duration"]=acc_df['End_Time'] -acc_df['Start_Time']
acc_df["duration_sec"]=acc_df['duration'].astype('timedelta64[s]').astype("float")
acc_df["duration_mins"]=acc_df['duration'].astype('timedelta64[m]').astype("float")
acc_df["duration_hours"]=acc_df['duration'].astype('timedelta64[h]').astype("float")
acc_df["duration_days"]=acc_df['duration'].astype('timedelta64[D]').astype("float")


In [37]:
acc_df = acc_df.merge(state, how='inner',  left_on="State", right_on="Code")

In [38]:
acc_df=acc_df[~(acc_df["duration_days"] > 10 )] # filtter accedents with very high waiting time  
acc_df['year_month'] = acc_df['Start_Time'].dt.strftime('%Y-%m')
mean_wait_by_state = acc_df.groupby('State_Name').agg('mean')[['duration_mins']].reset_index()
mean_count_by_state = acc_df.groupby('State_Name').agg('count')[["Severity"]].reset_index()
mean_by_state=mean_wait_by_state.merge(mean_count_by_state,how='inner', on="State_Name")
mean_by_state["mins_by_acc"]=mean_by_state["duration_mins"]/mean_by_state["Severity"]
mean_acc_by_state_mins=mean_by_state[["State_Name","mins_by_acc"]]
mean_acc_by_state_mins=mean_acc_by_state_mins.sort_values("mins_by_acc")

In [39]:
mean_wait_fig = px.bar(mean_acc_by_state_mins,x="State_Name", y="mins_by_acc",\
                              labels={'State_Name':'State', 'mins_by_acc':'Avreage Waiting in Minutes by Accident'},
                               log_y=True,
                              title='Avreage waiting time in mins per accident for each State')

In [40]:
Severity_map_fig = px.scatter(acc_df, x='Start_Lng', y='Start_Lat', opacity=0.5,
                              width=600 , height=400,
                              color='Severity', color_continuous_scale='deep',
                              hover_name='State_Name',
                              title='Top cities in accidents')

In [41]:
Top_cities = acc_df.groupby('City').agg('count')['Severity'].sort_values(ascending=False).head(30)
Top_cities_fig = px.bar(x=Top_cities.index, y=Top_cities.values,
                        labels={'x':'Top cities', 'y':'Number of accidents'},
                        title='Top cities in accidents')

In [42]:
accidents_per_month = acc_df.groupby('year_month').agg('count')['Severity'].sort_index()
accidents_month_fig = px.line(x=accidents_per_month.index, y=accidents_per_month.values,\
                              log_y=True, markers='date',
                              labels={'x':'Date', 'y':'Number of accidents'},
                              title='Accidents Per Month'
                         )

In [43]:
acc_df=acc_df[~(acc_df["Distance(mi)"] > 10 )]# filtter accedents with very high waiting distance  
mean_dist_by_state = acc_df.groupby('State_Name').agg('mean')[['Distance(mi)']].reset_index()
mean_count_by_state = acc_df.groupby('State_Name').agg('count')[["Severity"]].reset_index()
mean_by_state =mean_dist_by_state.merge(mean_count_by_state,how='inner', on="State_Name")
mean_by_state["miles_by_acc"]=mean_by_state["Distance(mi)"]/mean_by_state["Severity"]
mean_acc_by_state_miles=mean_by_state[["State_Name","miles_by_acc"]]
mean_acc_by_state_miles=mean_acc_by_state_miles.sort_values("miles_by_acc")

In [44]:
mean_distance_by_state_fig = px.bar(mean_acc_by_state_miles,x="State_Name", y="miles_by_acc",\
                                    labels={'State_Name':'State', 'miles_by_acc':'Avreage Distance Per Accident'},
                                    log_y=True,
                                    title='Avreage Distance in miles affected per accident for each State')

In [45]:
mean_wait_bysevety = acc_df.groupby('Severity').agg('mean')[['Distance(mi)']].reset_index()
mean_wait_bysevety_fig = px.line(mean_wait_bysevety,x="Severity", y="Distance(mi)",\
                                 markers="Distance(mi)",
                              labels={'Distance(mi)':'Distance In Miles', 'Severity':'Avreage Distance in Miles'},
                              title='Severity Effect of Traffic')

__=mean_wait_bysevety_fig.add_bar(x=mean_wait_bysevety["Severity"], y=mean_wait_bysevety["Distance(mi)"])

In [46]:
count_by_sun = acc_df.groupby('Sunrise_Sunset').agg('count')["Severity"].reset_index()
count_by_sun["Percent"]=count_by_sun["Severity"]/count_by_sun["Severity"].sum()

labels = count_by_sun["Sunrise_Sunset"]
values = count_by_sun["Severity"]

day_night_fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.1, 0],hole=0.4)])
____=day_night_fig.update_layout(
    title={
        'text': "Percentage Of Accidents According to Day Light",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})


In [47]:
count_byweather_sev=acc_df.groupby('Weather_Condition').agg('count')["Severity"].reset_index().sort_values("Severity",ascending=False)
count_byweather_sev["Percent"]=count_byweather_sev["Severity"]/count_byweather_sev["Severity"].sum()

labels = count_byweather_sev["Weather_Condition"][:7]
values = count_byweather_sev["Severity"]

count_byweather_fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.1, 0,0,0,0],hole=0.4)])
__=count_byweather_fig.update_layout(
    title={
        'text': "Percentage Of Accidents Per Weather condition",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})


In [48]:
app = JupyterDash(external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css'])

In [49]:
app.layout = html.Div(children=
[
    html.H1("US Crash Severity Analysis", style= {'textAlign': 'center', 'color': 'gray'}, className='twelve columns'),
    html.Br(),
    
    # bans
    html.Div([
        html.Div([
            html.H2('60 %',style= {'textAlign': 'center', 'color': 'black', 'fontFamily':'Arial Italic', 'fontSize':60}),
            html.H4('Of acciedents in US are occured in day light', style={'textAlign': 'center', 'color': 'black', 'fontFamily':'Arial Italic', 'fontSize':30}),
            ], className = 'six columns', style = {'textAlign' : 'center', 'backgroundColor': '#42a5f5'}),
        html.Div([
            html.H2('38 %', style= {'textAlign': 'center', 'color': 'black', 'fontFamily':'Arial Italic', 'fontSize':60}),
            html.H4('Of acciedents in US are occured in fair weather', style={'textAlign': 'center', 'color': 'black', 'fontFamily':'Arial Italic', 'fontSize':30})
            ], className = 'six columns', style = {'textAlign' : 'center', 'backgroundColor': '#ef5350'})
        ], className = 'row'),
    
    html.Div([
        html.Div([dcc.Graph(id='Severity_map_fig', figure=Severity_map_fig),
                  dcc.Dropdown(
                      id='month_dropdown',
                      options=[{'label' : str(month), 'value' : str(month)} for month in sorted(acc_df['month'].unique())],
                      placeholder="choose the month"),
                  dcc.Dropdown(
                      id='hour_dropdown',
                      options=[{'label' : str(hour), 'value' : str(hour)} for hour in sorted(acc_df['hour'].unique())],
                      placeholder="choose the hour")],
                           className='six columns'),
        html.Div([dcc.Graph(id='Top_cities_fig', figure=Top_cities_fig),
                  dcc.Dropdown(
                      id='cities_dropdown',
                      options=[{'label' : str(i), 'value' : i} for i in range(1,31)],
                      placeholder="choose number of cities")]
                 ,className='six columns'),
    ]),

    html.Div(html.Div([
        html.Div(dcc.Graph(id='accidents_month_fig', figure=accidents_month_fig),className='six columns'),
        html.Div(dcc.Graph(id='mean_wait_bysevety_fig', figure=mean_wait_bysevety_fig),className='six columns')
    ])),
    
    html.Div([
        html.Div(dcc.Graph(id='mean_distance_by_state_fig', figure=mean_distance_by_state_fig),className='six columns'),
        html.Div(dcc.Graph(id='mean_wait_fig', figure=mean_wait_fig),className='six columns')
    ]),
    
    html.Div([
        html.Div(dcc.Graph(id='day_night_fig', figure=day_night_fig),className='six columns'),
        html.Div(dcc.Graph(id='count_byweather_fig', figure=count_byweather_fig),className='six columns')
    ], style = {'textAlign' : 'center', 'backgroundColor': '#F5C64A'})
])
    


@app.callback(
    Output(component_id='Severity_map_fig', component_property='figure'),
    Input(component_id='month_dropdown', component_property='value'),
    Input(component_id='hour_dropdown', component_property='value')
)
def update_graph(monDropdown_value, hourDropdown_value) :
    if ((monDropdown_value==None) or (hourDropdown_value==None)) :
        return px.scatter(acc_df.sample(n=100000,axis=0,random_state=42), x='Start_Lng', y='Start_Lat', opacity=0.2,\
                              width=600 , height=400,\
                              hover_name='State_Name',
                              color='Severity', color_continuous_scale='deep',labels={"Start_Lng":"","Start_Lat":""}, title="US car crash distributio by Severity ",
)
    else :
        filtered_df = acc_df[(acc_df['month']==int(monDropdown_value)) | (acc_df['hour']==int(hourDropdown_value))]
        return px.scatter(filtered_df, x='Start_Lng', y='Start_Lat', opacity=0.2,\
                              width=600 , height=400,\
                              hover_name='State_Name',
                              color='Severity', color_continuous_scale='deep')
    
    
@app.callback(
    Output(component_id='Top_cities_fig', component_property='figure'),
    Input(component_id='cities_dropdown', component_property='value')
)   
def change_cities(citiesDropdown_value) :
    if citiesDropdown_value==None :
        return Top_cities_fig
    else :
        return px.bar(x=Top_cities[:citiesDropdown_value].index, y=Top_cities[:citiesDropdown_value].values,
                        labels={'x':'Top cities', 'y':'Number of accidents'},
                        title='Top cities in accidents')

In [50]:
app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
