In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from bokeh.models import ColumnDataSource, FactorRange, Legend
from bokeh.palettes import Category20, Viridis
from bokeh.plotting import figure, show, save, output_file
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import folium
from folium import plugins
from folium.plugins import HeatMap
import plotly.io as pio
from bokeh.io import output_notebook, show
output_notebook()
import bokeh
from bokeh.models.widgets import Panel, Tabs, RadioGroup, Button, RadioButtonGroup, Select, Slider
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import SingleIntervalTicker, LinearAxis
from bokeh.models import ColumnDataSource, FactorRange, Legend, HoverTool, GeoJSONDataSource, \
                        LinearColorMapper, ColorBar, NumeralTickFormatter, Div, Select, TableColumn, \
                        DataTable, CheckboxGroup, Tabs, Panel, CheckboxButtonGroup, RadioButtonGroup, \
                        Label, LabelSet, Range1d
from bokeh.layouts import widgetbox, column 
import calendar
import datetime
from bokeh.models import FixedTicker
import math
from bokeh.models import DatetimeTickFormatter, LinearAxis, Range1d

pio.renderers.default='notebook'

#set notebook options
pd.options.mode.chained_assignment = None 
pd.set_option('display.max_columns', None)
plt.rcParams.update({'font.size': 10})
sns.set_style("white")
plt_color = 'cadetblue'


In [2]:
# import data from data folder
df = pd.read_csv('Data/collision_weather.csv', index_col=0)


Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [236]:
df.columns

Index(['CRASH DATE', 'CRASH TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE',
       'LONGITUDE', 'LOCATION', 'ON STREET NAME', 'CROSS STREET NAME',
       'OFF STREET NAME', 'NUMBER OF PERSONS INJURED',
       'NUMBER OF PERSONS KILLED', 'NUMBER OF PEDESTRIANS INJURED',
       'NUMBER OF PEDESTRIANS KILLED', 'NUMBER OF CYCLIST INJURED',
       'NUMBER OF CYCLIST KILLED', 'NUMBER OF MOTORIST INJURED',
       'NUMBER OF MOTORIST KILLED', 'CONTRIBUTING FACTOR VEHICLE 1',
       'CONTRIBUTING FACTOR VEHICLE 2', 'CONTRIBUTING FACTOR VEHICLE 3',
       'CONTRIBUTING FACTOR VEHICLE 4', 'CONTRIBUTING FACTOR VEHICLE 5',
       'COLLISION_ID', 'VEHICLE TYPE CODE 1', 'VEHICLE TYPE CODE 2',
       'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4', 'VEHICLE TYPE CODE 5',
       'HOUR', 'MINUTE', 'YEAR', 'MONTH', 'DAY', 'time', 'temp', 'dwpt',
       'rhum', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 'coco'],
      dtype='object')

In [260]:
# from Crash date, get the year, month, day, hour, weekday, and season
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])
df['HOUR'] = df['CRASH DATE'].dt.hour
df['WEEKDAY'] = df['CRASH DATE'].dt.weekday
df['clock'] = df['CRASH TIME'] 
df['CRASH TIME'] = pd.to_datetime(df['CRASH TIME'])
df['HoW'] = df['WEEKDAY'] * 24 + (df['CRASH TIME'].dt.hour + 1).astype(int)
df['HoD'] = df['CRASH TIME'].dt.hour + 1

In [241]:
# save the index as id column 
df['id'] = df.index

In [242]:
df.loc[df['NUMBER OF PERSONS KILLED'] >= 1, 'SEVERITY'] = 'FATAL'
df.loc[(df['NUMBER OF PERSONS INJURED'] >= 1)  & (df['NUMBER OF PERSONS KILLED'] == 0), 'SEVERITY'] = 'SEVERE'
df.loc[(df['NUMBER OF PERSONS INJURED'] == 0)  & (df['NUMBER OF PERSONS KILLED'] == 0), 'SEVERITY'] = 'NON-SEVERE'

In [243]:
# Create a column which is boolean which is 1 if number of pedestrians injured or killed is greater than 0
df['PEDESTRIAN'] = np.where((df['NUMBER OF PEDESTRIANS INJURED'] > 0) | (df['NUMBER OF PEDESTRIANS KILLED'] > 0), 1, 0)
df['CYCLIST'] = np.where((df['NUMBER OF CYCLIST INJURED'] > 0) | (df['NUMBER OF CYCLIST KILLED'] > 0), 1, 0)
df['MOTORIST'] = np.where((df['NUMBER OF MOTORIST INJURED'] > 0) | (df['NUMBER OF MOTORIST KILLED'] > 0), 1, 0)

In [244]:
df_pedestrian_plot = df[(df['PEDESTRIAN'] == 1)].pivot_table(index=['YEAR','MONTH'], columns='SEVERITY', values='id', aggfunc='count')
df_pedestrian_plot = df_pedestrian_plot.fillna(0)
df_pedestrian_plot.reset_index(inplace=True)

df_cyclist_plot = df[(df['CYCLIST'] == 1)].pivot_table(index=['YEAR','MONTH'], columns='SEVERITY', values='id', aggfunc='count')
df_cyclist_plot = df_cyclist_plot.fillna(0)
df_cyclist_plot.reset_index(inplace=True)

df_motorist_plot = df[(df['MOTORIST'] == 1)].pivot_table(index=['YEAR','MONTH'], columns='SEVERITY', values='id', aggfunc='count')
df_motorist_plot = df_motorist_plot.fillna(0)
df_motorist_plot.reset_index(inplace=True)

In [245]:
#Create a new dataframe
df_fatal = pd.DataFrame({'Year': df_pedestrian_plot['YEAR'], 'Month': df_pedestrian_plot['MONTH'], 
                         'Pedestrian': df_pedestrian_plot['FATAL'], 
                         'Cyclist': df_cyclist_plot['FATAL'], 'Motorist': df_motorist_plot['FATAL']})

df_severe = pd.DataFrame({'Year': df_pedestrian_plot['YEAR'], 'Month': df_pedestrian_plot['MONTH'],
                          'Pedestrian': df_pedestrian_plot['SEVERE'],
                            'Cyclist': df_cyclist_plot['SEVERE'], 'Motorist': df_motorist_plot['SEVERE']})

df_non_severe = pd.DataFrame({'Year': df_pedestrian_plot['YEAR'], 'Month': df_pedestrian_plot['MONTH'],
                              'Pedestrian': df_pedestrian_plot['NON-SEVERE'],
                            'Cyclist': df_cyclist_plot['NON-SEVERE'], 'Motorist': df_motorist_plot['NON-SEVERE']})

In [246]:
plot_types = set(['Pedestrian', 'Cyclist', 'Motorist'])

In [348]:
output_file("docs\_includes\time_series_plot1.html")
colors_list = ["#008080", "#DA70D6", "#4B0082", "blue", "red"]
factors_fatal =  [(str(x[0]), str(calendar.month_name[x[1]])) for x in list(zip(df_fatal.Year, df_fatal.Month))]
factors_severe =  [(str(x[0]), str(calendar.month_name[x[1]])) for x in list(zip(df_severe.Year, df_severe.Month))]

######### Plot 1 - Fatal #########
p1 = figure(plot_width=1000, plot_height=300, title = "Fatal", x_axis_label = 'Year, Month', y_axis_label = 'Count', 
            x_range=FactorRange(*factors_fatal))
for i in range(len(df_fatal.columns)-2):
    p1.line(factors_fatal, df_fatal[df_fatal.columns[i+2]], line_width=2, color=colors_list[i], 
            legend_label=df_fatal.columns[i+2], muted_alpha=0.2, muted=(df_fatal.columns[i+2] != 'Pedestrian'))

p1.legend.location = (675,125)
p1.legend.orientation = "horizontal"
p1.min_border_top = 50
p1.legend.click_policy="mute" 
p1.xaxis.major_label_orientation = math.pi/2
p1.xaxis.major_label_text_font_size = "5pt"
p1.legend.label_text_font_size = "8pt"
tab1 = Panel(child=p1, title="Fatal")

######### Plot 2 - Severe #########
p2 = figure(plot_width=1000, plot_height=300, title = "Severe", x_axis_label = 'Year, Month', y_axis_label = 'Count', 
            x_range=FactorRange(*factors_severe))
for i in range(len(df_severe.columns)-2):
    p2.line(factors_severe, df_severe[df_severe.columns[i+2]], line_width=2, color=colors_list[i], 
            legend_label=df_severe.columns[i+2], muted_alpha=0.2, muted=(df_severe.columns[i+2] != 'Pedestrian'))

p2.legend.location = (675,125)
p2.legend.orientation = "horizontal"
p2.min_border_top = 50
p2.legend.click_policy="mute" ### assigns the click policy
p2.legend.label_text_font_size = "8pt"
p2.xaxis.major_label_text_font_size = "5pt"
p2.xaxis.major_label_orientation = math.pi/2
tab2 = Panel(child=p2, title="Severe")

#########################################
# add hover tool to all plots with the year, month and count
hover = HoverTool(tooltips=[('Year, Month', '@x'), ('Count', '@y')])
p1.add_tools(hover)
p2.add_tools(hover)
tabs = Tabs(tabs=[ tab1, tab2])

# show the results
show(tabs)

In [262]:
df_pedestrian_plot2 = df[(df['PEDESTRIAN'] == 1)].pivot_table(index=['WEEKDAY','HoD'], columns='SEVERITY', values='id', aggfunc='count')
df_pedestrian_plot2 = df_pedestrian_plot2.fillna(0)
df_pedestrian_plot2.reset_index(inplace=True)

df_cyclist_plot2 = df[(df['CYCLIST'] == 1)].pivot_table(index=['WEEKDAY','HoD'], columns='SEVERITY', values='id', aggfunc='count')
df_cyclist_plot2 = df_cyclist_plot2.fillna(0)
df_cyclist_plot2.reset_index(inplace=True)

df_motorist_plot2 = df[(df['MOTORIST'] == 1)].pivot_table(index=['WEEKDAY','HoD'], columns='SEVERITY', values='id', aggfunc='count')
df_motorist_plot2 = df_motorist_plot2.fillna(0)
df_motorist_plot2.reset_index(inplace=True)

In [263]:
#Create a new dataframe
df_fatal2 = pd.DataFrame({'WeekDay': df_pedestrian_plot2['WEEKDAY'], 'HoD': df_pedestrian_plot2['HoD'], 
                         'Pedestrian': df_pedestrian_plot2['FATAL'], 
                         'Cyclist': df_cyclist_plot2['FATAL'], 'Motorist': df_motorist_plot2['FATAL']})

df_severe2 = pd.DataFrame({'WeekDay': df_pedestrian_plot2['WEEKDAY'], 'HoD': df_pedestrian_plot2['HoD'],
                          'Pedestrian': df_pedestrian_plot2['SEVERE'],
                            'Cyclist': df_cyclist_plot2['SEVERE'], 'Motorist': df_motorist_plot2['SEVERE']})

df_non_severe2 = pd.DataFrame({'WeekDAY': df_pedestrian_plot2['WEEKDAY'], 'HoD': df_pedestrian_plot2['HoD'],
                              'Pedestrian': df_pedestrian_plot2['NON-SEVERE'],
                            'Cyclist': df_cyclist_plot2['NON-SEVERE'], 'Motorist': df_motorist_plot2['NON-SEVERE']})

In [264]:
# replace weekdays with the actual names
df_fatal2['WeekDay'] = df_fatal2['WeekDay'].replace([0,1,2,3,4,5,6],['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
df_severe2['WeekDay'] = df_severe2['WeekDay'].replace([0,1,2,3,4,5,6],['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])


In [349]:
output_file("time_series_plot2.html")
colors_list = ["#008080", "#DA70D6", "#4B0082", "blue", "red"]
factors_fatal =  [(str(x[0]), str(x[1])) for x in list(zip(df_fatal2.WeekDay, df_fatal2.HoD))]
factors_severe =  [(str(x[0]), str(x[1])) for x in list(zip(df_severe2.WeekDay, df_severe2.HoD))]

######### Plot 1 - Fatal #########
p1 = figure(plot_width=1000, plot_height=300, title = "Fatal", x_axis_label = 'Day, Hour', y_axis_label = 'Count', 
            x_range=FactorRange(*factors_fatal), y_range=(0, 18))
for i in range(len(df_fatal2.columns)-2):
    p1.line(factors_fatal, df_fatal2[df_fatal2.columns[i+2]], line_width=2, color=colors_list[i], 
            muted_alpha=0.2, muted = (df_fatal2.columns[i+2]!='Pedestrian'), legend_label=df_fatal2.columns[i+2])

p1.xaxis.major_label_orientation = math.pi/2
p1.xaxis.major_label_text_font_size = "5pt"
p1.legend.label_text_font_size = "8pt"
p1.legend.click_policy="mute" ### assigns the click policy
p1.legend.location = (675,150)
p1.legend.orientation = "horizontal"
p1.min_border_top = 50
tab1 = Panel(child=p1, title="Fatal")

######### Plot 2 - Severe #########
p2 = figure(plot_width=1000, plot_height=300, title = "Severe", x_axis_label = 'Year, Hour', y_axis_label = 'Count', 
            x_range=FactorRange(*factors_severe))
for i in range(len(df_severe2.columns)-2):
    p2.line(factors_severe, df_severe2[df_severe2.columns[i+2]], line_width=2, color=colors_list[i], 
             muted_alpha=0.2, muted = (df_severe2.columns[i+2]!='Pedestrian'), legend_label=df_severe2.columns[i+2])

p2.legend.location = "top_right"
p2.xaxis.major_label_orientation = math.pi/2
p2.legend.label_text_font_size = "8pt"
p2.xaxis.major_label_text_font_size = "5pt"
p2.legend.click_policy="mute" ### assigns the click policy
p2.legend.location = (675,150)
p2.min_border_top = 50
p2.legend.orientation = "horizontal"
tab2 = Panel(child=p2, title="Severe")

#########################################
# add hover tool to all plots with the year, month and count
hover = HoverTool(tooltips=[('WeekDay, Hour', '@x'), ('Count', '@y')])
p1.add_tools(hover)
p2.add_tools(hover)
tabs = Tabs(tabs=[ tab1, tab2])

# show the results
show(tabs)

## looking at weather data 

In [340]:
df.columns

Index(['CRASH DATE', 'CRASH TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE',
       'LONGITUDE', 'LOCATION', 'ON STREET NAME', 'CROSS STREET NAME',
       'OFF STREET NAME', 'NUMBER OF PERSONS INJURED',
       'NUMBER OF PERSONS KILLED', 'NUMBER OF PEDESTRIANS INJURED',
       'NUMBER OF PEDESTRIANS KILLED', 'NUMBER OF CYCLIST INJURED',
       'NUMBER OF CYCLIST KILLED', 'NUMBER OF MOTORIST INJURED',
       'NUMBER OF MOTORIST KILLED', 'CONTRIBUTING FACTOR VEHICLE 1',
       'CONTRIBUTING FACTOR VEHICLE 2', 'CONTRIBUTING FACTOR VEHICLE 3',
       'CONTRIBUTING FACTOR VEHICLE 4', 'CONTRIBUTING FACTOR VEHICLE 5',
       'COLLISION_ID', 'VEHICLE TYPE CODE 1', 'VEHICLE TYPE CODE 2',
       'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4', 'VEHICLE TYPE CODE 5',
       'HOUR', 'MINUTE', 'YEAR', 'MONTH', 'DAY', 'time', 'temp', 'dwpt',
       'rhum', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 'coco',
       'WEEKDAY', 'clock', 'HoW', 'id', 'SEVERITY', 'PEDESTRIAN', 'CYCLIST',
       'MOTORI

In [346]:
# get the rows with most 


# OLD

In [None]:
df_yearly_1 = df_cars[df_cars.Direction == 'T'].groupby(['Year', 'Month']).agg({'total_count': ['mean', 'sum']}).reset_index()
df_yearly_1.columns = ['Year', 'Month', 'mean', 'sum']


tabs = []
factors =  [(str(x[0]), str(calendar.month_name[x[1]])) for x in list(zip(df_yearly_1.Year, df_yearly_1.Month))]

for graph in [('mean', 'Average of daily passing motor vehicles by year, month', 'Average of daily counts of motor vehicles'), ('sum', 'Total of daily passing motor vehicles by month, year', 'Total count of motor vehicles')]:

    metric = graph[0]
    title = graph[1]
    y_label = graph[2]
    
    df_v = df_yearly_1[['Year', 'Month', metric]]

    #Bookeh figure
    p = figure(x_range = FactorRange(*factors), y_range= (0, max(df_v[metric]) + 0.2*max(df_v[metric])), height=400, width=1000, toolbar_location=None,
              x_axis_label='Year, Month', y_axis_label=y_label)


    p.title.text =title
    p.title.align = "left"
    p.title.text_color = "#d14a0a"
    p.title.text_font_size = "15px"
    p.title.vertical_align = 'top'


    b_ds = {'x': factors, 
           'means': df_v[metric].to_list(),
    }

    bar_cols = ['means']
    bar ={} 

    for indx,i in enumerate(bar_cols):
        bar[i] = p.vbar(x='x',  top=i, source= b_ds, width=0.6, 
                         muted = False, color={'field': 'means', 'transform': LinearColorMapper(palette=list(reversed(Oranges256)), low=min(df_v[metric]), high=max(df_v[metric]))}, line_color='white')


    # iniciate hover object
    hover = HoverTool()
    hover.tooltips = [('{}'.format(y_label), '@means{0.00}')]
    # use 'datetime' formatter for 'DT' field
    hover.renderers = [bar['means']]
    p.add_tools(hover)   
    p.y_range.start = 0
    p.x_range.range_padding = 0.05
    p.xaxis.major_label_orientation = math.pi/2
    p.xgrid.grid_line_color = None

    # p.xaxis.axis_label_text_font_size = "4pt"
    p.xaxis.major_label_text_font_size = "6pt"
    
    
    tab = Panel(child=p, title=metric.title())
    tabs.append(tab)

show(Tabs(tabs=tabs))