In [1]:
import pandas as pd
from dash import Dash 
from dash import html, dcc 
from dash import callback, Output, Input
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

In [2]:
#Loading and sorting data from raw github file.
df = pd.read_csv('https://raw.githubusercontent.com/arbash-malik/ceutest/main/NYC%20Accident%20Databasev2.csv')
df = df.sort_values(by='crash_date').reset_index(drop=True)

# Creating a variable to use for inputs
boroughs = df['location'].unique()
boroughs.sort()

# Creating dataframe to use for my graph - fig 2 (Top 5 Contributing factors)
top_5_factors = df['contributing_factor'].value_counts().nlargest(5).index
df_top_5_factors = df[df['contributing_factor'].isin(top_5_factors)].groupby(['contributing_factor', 'location'])['persons_killed'].sum().reset_index()


# Initiating Dash
app = Dash(__name__)

# Making my layout for dashboard
app.layout = html.Div([
    
    # Setting title
    html.H1('NYC Boroughs Accidents', style={'textAlign': 'center'}), 
    html.H2('Arbash Malik - 2202071 ', style={'textAlign': 'center'}),     
    
    # Customizing filter
    html.Label('Select Borough', style={'font-weight': 'bold', "text-align": "center",'font-size': "16px"}), 
    dcc.Checklist(id='input1',inline = True ,options=boroughs, style={'width': '50%'}),
    
    # Customizing dash layout for graphs - added borders for all graphs so that it is more comprehensible
    dcc.Graph(id='graph3',style ={'width': '98.1%','border': '0.2px solid #D3D3D3'}),
    dcc.Graph(id='graph1', style = {'display': 'inline-block', 'width': '49%','border': '0.2px solid #D3D3D3'}),
    dcc.Graph(id='graph2', style = {'display': 'inline-block', 'width': '49%','border': '0.2px solid #D3D3D3'})
])

# Callback for 1 input - "boroughs" to affect 3 graphs
@app.callback(
    Output('graph1', 'figure'),
    Output('graph2', 'figure'),
    Output('graph3', 'figure'),
    Input('input1', 'value')
)

def update1(_input1):

    # If no specific location is selected, the dashboard shows for all boroughs
    if not _input1:
        filtered_df = df
        filtered_df_top_5 = df_top_5_factors
        location_title = '- All Boroughs'
    
    # If one or multiple selections are made on filter then it shows for the filters.
    else: 
        filtered_df_top_5 = df_top_5_factors.query("location == @_input1") # query for fig1
        filtered_df = df.query("location == @_input1") # query for fig 2 & fig 3
        location_title = f'<b>for:</b> {", ".join(_input1)}' # Code for title-it also removes the list from multiple selections 

    # fig1 is a bar chart that gives no of accidents by hour - location : row 2 col1   
    fig1 = px.histogram(filtered_df,
                        x =filtered_df['hour'],
                        nbins=24,
                        title=f'<b>No of Accidents </b> {location_title} <br>By Hour',
                        color_discrete_sequence=['#599e94']
                       )
    # fig2 is a horizontal bar chart that gives top 5 contributing factors in accidents - location : row 2 col 2
    fig2 = px.histogram(filtered_df_top_5,
                        x='persons_killed',
                        y='contributing_factor',
                        orientation='h',
                        title=f'<b>Top 5 Contributing Factors</b> {location_title}')

    # fig3 is a stacked bar chart that gives no of persons killed by year for 3 categories - location : row1 full span   
    fig3 = px.histogram(filtered_df,
                        title=f'<b>Number of Persons Killed</b> {location_title}<br>By Year',
                        x='year',
                        y=['pedestrians_killed', 'cyclist_killed', 'motorist_killed'],
                        color_discrete_sequence=['#466964', '#a4a2a8', '#599e94']
                        )

    
    # Customizing my fig1 - changing x & y axix titles, added datalabels, changed data labels format, customized each bin 
    fig1.update_layout(xaxis_title='Hour',yaxis_title='Number of Accidents',plot_bgcolor='white')
    fig1.update_traces(texttemplate='%{value:.2s}',textposition='outside',textfont_size=11,textangle=0)
    fig1.update_traces(marker_line_color='white', marker_line_width=0.1)
    fig1.update_yaxes(visible=False)   
    
    # Customizing my fig2 - changing x & y axix titles, added datalabels, changed data labels format, changed order to desc
    fig2.update_layout(yaxis_title='',plot_bgcolor='white')
    fig2.update_traces(marker_color='#599e94',texttemplate='%{value:d}',textposition='outside',textfont_size=15,textangle=0)
    fig2.update_xaxes(visible=False)
    fig2.update_yaxes(tickfont_size=16,categoryorder='category descending')
    
    # Customizing my fig3 
        # Changed x axis title, added group barmode for category view, customized legend
    fig3.update_layout(xaxis_title='Year', barmode='group', plot_bgcolor='white')
    fig3.update_layout(legend_title=None, legend=dict(font=dict(size=16)))

        # changing legend names
    newnames={'pedestrians_killed': 'Pedestrians', 'cyclist_killed': 'Cyclists', 'motorist_killed': 'Motorists'}
    fig3.for_each_trace(lambda t: t.update(name = newnames[t.name],legendgroup = newnames[t.name]))
        
        # Added Datalabels, removed yaxes, changed x axis to category
    fig3.update_traces(texttemplate='%{value:d}', textposition='auto', textfont_size=15, textangle=0)
    fig3.update_xaxes(type='category',tickfont_size=12)
    fig3.update_yaxes(visible=False)  

    return fig1, fig2, fig3

# Run Dashboard
if __name__ == '__main__':
    app.run_server(debug=True)
