In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Read in vehicle crash information
df = pd.read_csv('Crashes_in_DC.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
bicyclist_pedestrian_index = ['MAJORINJURIES_BICYCLIST','MINORINJURIES_BICYCLIST', 'FATAL_BICYCLIST',\
                             'MAJORINJURIES_PEDESTRIAN','MINORINJURIES_PEDESTRIAN','FATAL_PEDESTRIAN']

# Keep only incidents involving bicyclists or pedestrians
df = df.loc[(df['MAJORINJURIES_BICYCLIST']==1) | (df['MINORINJURIES_BICYCLIST']==1) | \
            (df['FATAL_BICYCLIST']==1) | (df['MAJORINJURIES_PEDESTRIAN']==1) | \
            (df['MINORINJURIES_PEDESTRIAN']==1) | (df['FATAL_PEDESTRIAN']==1)]

# Convert dates to datime format
df['REPORTDATE'] = pd.to_datetime(df['REPORTDATE'])

In [4]:
# Determine locations of accidents involving pedestrians and bicyclists
bike_lat = df.loc[(df['MAJORINJURIES_BICYCLIST']==1) | (df['MINORINJURIES_BICYCLIST']==1) | \
                  (df['FATAL_BICYCLIST']==1)].MPDLATITUDE.values
bike_long = df.loc[(df['MAJORINJURIES_BICYCLIST']==1)| (df['MINORINJURIES_BICYCLIST']==1) | \
                   (df['FATAL_BICYCLIST']==1)].MPDLONGITUDE.values

ped_lat = df.loc[(df['MAJORINJURIES_PEDESTRIAN']==1) | (df['MINORINJURIES_PEDESTRIAN']==1) | \
                 (df['FATAL_PEDESTRIAN']==1)].MPDLATITUDE.values
ped_long = df.loc[(df['MAJORINJURIES_PEDESTRIAN']==1) | (df['MINORINJURIES_PEDESTRIAN']==1) | \
                  (df['FATAL_PEDESTRIAN']==1)].MPDLONGITUDE.values

In [5]:
# Determine bikeshare location as a proxy for bicyclist density
df_cb = pd.read_csv('Capital_Bike_Share_Locations.csv')

bikeshare_lat = df_cb.LATITUDE.values
bikeshare_long = df_cb.LONGITUDE.values

In [6]:
# Determine weather data and convert relevant columns to proper datatype
df_wthr = pd.read_csv('NOAA_Weather_Data.csv')
df_wthr['DATE'] = pd.to_datetime(df_wthr.DATE)
df_wthr['PRCP'] = pd.to_numeric(df_wthr.PRCP)
df_wthr['SNOW'] = pd.to_numeric(df_wthr.SNOW)
df_wthr['TMAX'] = pd.to_numeric(df_wthr.TMAX)
df_wthr['TMIN'] = pd.to_numeric(df_wthr.TMIN)

df_wthr = df_wthr[['DATE','PRCP','SNOW','TMAX','TMIN']] 

df_wthr['DATE'] = pd.to_datetime(df_wthr['DATE'])

In [7]:
# Match incidents to weather conditions

df_tmp = pd.DataFrame(columns=['DATE','PRCP','SNOW','TMAX','TMIN'])

for index, row in df.iterrows():
    day = row.REPORTDATE.day
    month = row.REPORTDATE.month
    year = row.REPORTDATE.year

    if day<10:
        day = '0'+str(day)
    else:
        day = str(day)
    
    if month<10:
        month = '0'+str(month)
    else:
        month = str(month)

    start = str(year)+'-'+month+'-'+day

    mask = df_wthr['DATE']==start
    
    if mask.sum()>0:
        df_tmp = df_tmp.append(df_wthr.loc[mask])
    else:
         df_tmp = df_tmp.append(df_wthr.iloc[0])
        
# Add weather statistics to incidents
df['PRCP'] = df_tmp['PRCP'].values
df['SNOW'] = df_tmp['SNOW'].values
df['TMAX'] = df_tmp['TMAX'].values
df['TMIN'] = df_tmp['TMIN'].values

In [8]:
from bokeh.io import output_notebook, output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap

output_notebook()

In [10]:
# Plot spatial locations of incidents and bikeshare locations
map_options = GMapOptions(lat=38.9, lng=-77, map_type="roadmap", zoom=13)

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
TOOLS = "pan,wheel_zoom,reset,help"
p = gmap("AIzaSyD4dkSdxSlUd_YUBqZPa-ybxF9MG4PqL4Q", map_options, title="Vehicle Crashes Involving Bicyclists and Pedestrians in Washington DC",\
         tools = TOOLS, plot_width=900, plot_height=900)

source_bike = ColumnDataSource(
    data=dict(lat=bike_lat,
              lon=bike_long)
)

source_ped = ColumnDataSource(
    data=dict(lat=ped_lat,
              lon=ped_long)
)

source_bikeshare = ColumnDataSource(
    data=dict(lat=bikeshare_lat,
              lon=bikeshare_long)
)

p.circle(x="lon", y="lat", size=3, fill_color="blue", fill_alpha=0.8, source=source_bike, legend='Bike Incident')

p.circle(x="lon", y="lat", size=3, fill_color="red", color="red", fill_alpha=0.8, source=source_ped, legend='Pedestrian Incident')

p.square(x="lon", y="lat", size=10, fill_color="firebrick", color="firebrick", fill_alpha=0.2, source=source_bikeshare, legend='Bikeshare Location')

p.legend.location = "top_left"
p.legend.click_policy="hide"

show(p)

# Map is fully interative. Zoom and pan to get a better idea of where incidents tend to occur. By clicking on the legend
# you can toggle on and off certain markers