In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from datetime import datetime
from datetime import timedelta
# sns.set(style="darkgrid")
import requests
import pprint
import re
pd.options.display.max_columns = 999

In [2]:
times1 = datetime.now()
df = pd.read_csv('../Car_Crash/Data/Crash.csv',skipinitialspace=True)
times2 = datetime.now()
print('Time spent: '+ str(times2-times1)) # print the time cost

  interactivity=interactivity, compiler=compiler, result=result)


Time spent: 0:00:23.278527


In [3]:
df = df[~df['Postal'].isnull()]

In [4]:
# convert zip code into strings
df.Postal = df.Postal.astype(int).astype(str)
df.Postal = df.Postal.apply(lambda x: x if len(x) == 5 else '0'+x)

In [5]:
# preprocess illumination
df.Illumination = df.Illumination.str.replace('– ', '', regex=False)
df.Illumination = df.Illumination.str.replace('Unknown (expired)', 'Other',
                                              regex=False)
df.Illumination = df.Illumination.fillna('Other')

# preprocess weather
df.Weather = df.Weather.str.replace('Sleet (hail)', 'Sleet(hail)', regex=False)
df.Weather = df.Weather.str.replace('Unknown ', 'Other', regex=False)
df.Weather = df.Weather.fillna('Other')

# preprocess road condition
df.Road_Condition = df.Road_Condition.str.replace('Sand/ mud/ dirt/ oil/ or '
                                                  'gravel',
                                                  'Sand/mud/dirt/oil/or gravel'
                                                  , regex=False)
df.Road_Condition = df.Road_Condition.str.replace('Unknown (expired)',
                                                  'Other', regex=False)
df.Road_Condition = df.Road_Condition.fillna('Other')

# delete the rows with hour of day 99
df = df[~(df['Hour_of_Day'] == 99)]

# crash type pie chart

In [14]:
crash_type_df = df.groupby('Collision_Type').Crash_Record_Number.count().reset_index()
crash_type_df.Crash_Record_Number = crash_type_df.Crash_Record_Number/ crash_type_df.Crash_Record_Number.sum() * 100
crash_type_df.rename(columns={'Crash_Record_Number':'Percentage'}, inplace=True)

In [15]:
labels = crash_type_df.Collision_Type.values.tolist()
values = crash_type_df.Percentage.values.tolist()

In [16]:
import plotly.plotly as py
import plotly.graph_objs as go

trace = go.Pie(labels=labels, values=values,
               hoverinfo='label+percent', 
               textfont=dict(size=20),
               marker=dict(line=dict(color='#000000', width=2)))

py.iplot([trace], filename='styled_pie_chart')


Consider using IPython.display.IFrame instead



# Crash type interactive viz

In [8]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.graph_objs as go

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

available_indicators = ['Crash Month', 'Day of Week', 'Hour of Day', 'Illumination', 'Weather', 'Road Condition']


app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Dropdown(
                id='crash_type_by',
                options=[{'label': i, 'value': i} for i in available_indicators],
                value='Crash Month'
            )
        ],
        style={'width': '15%', 'display': 'inline-block'})
    ]),

    dcc.Graph(id='indicator-graphic')
])


@app.callback(
    dash.dependencies.Output('indicator-graphic', 'figure'),
    [dash.dependencies.Input('crash_type_by', 'value')])
def update_graph(x_name_1):
    x_name = '_'.join(x_name_1.split(' '))
    plot_df = df.groupby([x_name,'Collision_Type']).Crash_Record_Number.count().unstack()
    plot_df['Sum'] = plot_df.sum(axis=1)
    
    for i in range(len(plot_df.columns.tolist())-1):
        plot_df.iloc[:,i] = plot_df.iloc[:,i]/plot_df.Sum*100
    plot_df = plot_df.iloc[:,:-1]

    index_list = plot_df.index.tolist()

    y_name_list = plot_df.columns.tolist()

    data = []
    for y_name in y_name_list:
        data.append(go.Bar(
            x=index_list,
            y=plot_df[y_name].values.tolist(),
            name=y_name
        ))

    layout = go.Layout(
        barmode='stack',
        title=f'Crash Type Percentage by {x_name_1}'
    )

    return {
        'data': data,
        'layout': layout
    }
if __name__ == '__main__':
    app.run_server(debug=False,port=8050)

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [12/Dec/2018 22:45:20] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:45:21] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:45:21] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:45:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:45:25] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


# crash type on map

In [13]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.graph_objs as go

mapbox_access_token = 'pk.eyJ1IjoidWlyc2VpdGEiLCJhIjoiY2pwaGx4eXQ0MDAwdTNxcXdwMGo0cGpxdiJ9.ux2pBATNhOgnghsvMFbQvw'
column_list = ['Crash_Record_Number',
               'County_Name',
               'Crash_Year',
               'Weather',
               'Road_Condition',
               'Collision_Type',
               'Latitude_(Decimal)',
               'Longitude_(Decimal)',
               'Postal']
df1 = df[column_list]
df1['Collision_Type'] = df1['Collision_Type'].astype('category')
df1['Collision_Type_color'] = df1['Collision_Type'].cat.codes
df1['Collision_Type_color'] = df1['Collision_Type_color']/(len(df1['Collision_Type_color'].unique().tolist())-1)

# .sample(n=40000, replace=False,random_state=1)
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

crash_type_list = df1.Collision_Type.unique().tolist()
year_list = df1.Crash_Year.sort_values().unique().tolist()


app.layout = html.Div([
    html.Div([
        html.Div([
            dcc.Dropdown(
                id='crash_type',
                options=[{'label': i, 'value': i} for i in crash_type_list],
                value=crash_type_list,
                multi=True
            ),
            dcc.Dropdown(
                id='year',
                options=[{'label': i, 'value': i} for i in year_list],
                value=year_list,
                multi=True
            )
        ],
        style={'width': '20%', 'display': 'inline-block'})
    ]),

    dcc.Graph(id='indicator-graphic')
])

@app.callback(
    dash.dependencies.Output('indicator-graphic', 'figure'),
    [dash.dependencies.Input('crash_type', 'value'),
    dash.dependencies.Input('year', 'value')])
def update_graph(crash_type, year):
    scl = [ [0,"rgb(229, 0, 14)"],[1/9,"rgb(231, 63, 2)"],[2/9,"rgb(233, 142, 5)"],\
    [3/9,"rgb(235, 220, 8)"],[4/9,"rgb(177, 237, 11)"],[5/9,"rgb(104, 239, 14)"],\
      [6/9, "rgb(0, 0, 0)"], [7/9, "rgb(32, 241, 17)"], [8/9, "rgb(23, 245, 156)"], [1, "rgb(27, 248, 232)"]]
    
    plot_df = df1
    plot_df = plot_df[plot_df['Collision_Type'].isin(crash_type)]
    plot_df = plot_df[plot_df['Crash_Year'].isin(year)]
    
    if plot_df.shape[0] > 40000:
        plot_df = plot_df.sample(n=40000, replace=False,random_state=1)
        
    data = [
        go.Scattermapbox(
            lon = plot_df['Longitude_(Decimal)'],
            lat = plot_df['Latitude_(Decimal)'],
            text = plot_df['Collision_Type'],
            mode='markers',
            marker=dict(
                size=5,
                opacity = 0.7,
                autocolorscale = False,
                colorscale = scl,
                color = plot_df['Collision_Type_color'],
            ),
        )
    ]
    
    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=41,
                lon=-77
            ),
            
            pitch=0,
            zoom=6
        ),
    )

    return go.Figure(data=data, layout=layout)
if __name__ == '__main__':
    app.run_server(debug=False,port=8050)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [12/Dec/2018 22:49:11] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:49:11] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [12/Dec/2018 22:49:11] "[37m