# Mapping Traffic Incidents in NYC

We aim to create a folium map of New York City divided into its five boroughs, using the provided geopandas dataframe and a population, area, and density dataframe. We will also add hover tools displaying the borough name and the total number of accidents. Additionally, we will determine the most appropriate metric for visualizing the number of accidents relative to characteristics of each borough, such as population, area, number of accidents and number of *fatal* accidents.


In [52]:
import pandas as pd
import numpy as np 

#map
import folium
import geopandas as gpd
from folium.features import GeoJson, GeoJsonTooltip
from branca.colormap import LinearColormap
import ipywidgets as widgets
from IPython.display import display, clear_output

#set notebook options
pd.options.mode.chained_assignment = None 
pd.set_option('display.max_columns', None)
#plt.rcParams.update({'font.size': 10})
#sns.set_style("white")
plt_color = 'cadetblue'

In [22]:
# NYC traffic incident data
filename='collision_weather.csv'
incidents_df = pd.read_csv(f'./Data/{filename}', index_col=0)
incidents_df.head()

  incidents_df = pd.read_csv(f'./Data/{filename}', index_col=0)


Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,NUMBER OF PERSONS INJURED,NUMBER OF PERSONS KILLED,NUMBER OF PEDESTRIANS INJURED,NUMBER OF PEDESTRIANS KILLED,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED,NUMBER OF MOTORIST INJURED,NUMBER OF MOTORIST KILLED,CONTRIBUTING FACTOR VEHICLE 1,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5,HOUR,MINUTE,YEAR,MONTH,DAY,time,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
0,2021-09-11,9:35,BROOKLYN,11208.0,40.667202,-73.8665,"(40.667202, -73.8665)",,,1211 LORING AVENUE,0.0,0.0,0,0,0,0,0,0,Unspecified,,,,,4456314,Sedan,,,,,9,35,2021,9,11,2021-09-11 09:00:00,15.6,9.9,69.0,0.0,,0.0,0.0,,1020.3,,1.0
1,2021-12-14,8:13,BROOKLYN,11233.0,40.683304,-73.917274,"(40.683304, -73.917274)",SARATOGA AVENUE,DECATUR STREET,,0.0,0.0,0,0,0,0,0,0,,,,,,4486609,,,,,,8,13,2021,12,14,2021-12-14 08:00:00,5.6,-7.3,39.0,0.0,,240.0,11.2,,1026.2,,1.0
2,2021-12-14,8:17,BRONX,10475.0,40.86816,-73.83148,"(40.86816, -73.83148)",,,344 BAYCHESTER AVENUE,2.0,0.0,0,0,0,0,2,0,Unspecified,Unspecified,,,,4486660,Sedan,Sedan,,,,8,17,2021,12,14,2021-12-14 08:00:00,7.0,-8.6,32.0,0.0,,282.0,13.0,,1027.0,,1.0
3,2021-12-14,21:10,BROOKLYN,11207.0,40.67172,-73.8971,"(40.67172, -73.8971)",,,2047 PITKIN AVENUE,0.0,0.0,0,0,0,0,0,0,Driver Inexperience,Unspecified,,,,4487074,Sedan,,,,,21,10,2021,12,14,2021-12-14 21:00:00,11.7,-5.8,29.0,0.0,,10.0,13.0,,1033.8,,2.0
4,2021-12-14,14:58,MANHATTAN,10017.0,40.75144,-73.97397,"(40.75144, -73.97397)",3 AVENUE,EAST 43 STREET,,0.0,0.0,0,0,0,0,0,0,Passing Too Closely,Unspecified,,,,4486519,Sedan,Station Wagon/Sport Utility Vehicle,,,,14,58,2021,12,14,2021-12-14 14:00:00,9.0,-2.0,46.0,0.0,,322.0,7.0,,1033.0,,2.0


First, the geopandas dataframe is merged with the population, area, and density dataframe for some basic characteristics of each borough.

In [58]:
# Geo JSON data
url = "https://raw.githubusercontent.com/python-visualization/folium/main/examples/data"
nybb = f"{url}/nybb.zip"
geo_df = gpd.read_file(nybb)


# Population and density data
# source: https://en.wikipedia.org/wiki/Boroughs_of_New_York_City (2020 population)
data = {
    'BoroName':    ['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'],
    'population': [1472654, 2736074, 1694263, 2405464, 495747],
    'area': [109.3, 179.7, 58.8, 281.5, 148.9],
    'density': [13482, 15227, 28872, 8542, 3327]
}

borough_info = pd.DataFrame(data)


# Merge the dataframes
borough_df = geo_df.merge(borough_info, on='BoroName')
borough_df.head()

Unnamed: 0,BoroCode,BoroName,Shape_Leng,Shape_Area,geometry,population,area,density
0,5,Staten Island,330454.175933,1623847000.0,"MULTIPOLYGON (((970217.022 145643.332, 970227....",495747,148.9,3327
1,3,Brooklyn,741227.337073,1937810000.0,"MULTIPOLYGON (((1021176.479 151374.797, 102100...",2736074,179.7,15227
2,4,Queens,896875.396449,3045079000.0,"MULTIPOLYGON (((1029606.077 156073.814, 102957...",2405464,281.5,8542
3,1,Manhattan,358400.912836,636430800.0,"MULTIPOLYGON (((981219.056 188655.316, 980940....",1694263,58.8,28872
4,2,Bronx,464475.145651,1186822000.0,"MULTIPOLYGON (((1012821.806 229228.265, 101278...",1472654,109.3,13482


In [59]:
# Ensure borough names match the geo df
incidents_df['BOROUGH'] = incidents_df['BOROUGH'].str.title()  

# create columns that indicate if only material damage, injuries or fatalities occured
incidents_df['MATERIAL DAMAGE'] = np.where((incidents_df['NUMBER OF PERSONS INJURED'] == 0) & (incidents_df['NUMBER OF PERSONS KILLED'] == 0), 1, 0)
incidents_df['INJURIES'] = np.where((incidents_df['NUMBER OF PERSONS INJURED'] > 0) & (incidents_df['NUMBER OF PERSONS KILLED'] == 0), 1, 0)
incidents_df['FATALITIES'] = np.where((incidents_df['NUMBER OF PERSONS INJURED'] >= 0) & (incidents_df['NUMBER OF PERSONS KILLED'] > 0), 1, 0)
incidents_df['SEVERE ACCIDENT'] = np.where((incidents_df['INJURIES'] > 0) | (incidents_df['FATALITIES'] > 0), 1, 0)

# the sum of MATERIAL DAMAGE, INJURIES and FATALITIES should be equal to df.shape = 235763
incidents_df['MATERIAL DAMAGE'].sum() + incidents_df['INJURIES'].sum() + incidents_df['FATALITIES'].sum()

1253287

Determine the most suitable metric for visualizing the number of accidents relative to the borough characteristics. 

In [60]:
# Group by borough and year, and calculate the number of accidents and fatal accidents
borough_year_accidents = incidents_df.groupby(['BOROUGH', 'YEAR']).agg({'SEVERE ACCIDENT': ['count', 'sum']}).reset_index()

# Flatten the MultiIndex column names
borough_year_accidents.columns = ['_'.join(col).rstrip('_') for col in borough_year_accidents.columns.values]
borough_year_accidents = borough_year_accidents.rename(columns={'SEVERE ACCIDENT_count': 'total_accidents',
                                                                'SEVERE ACCIDENT_sum': 'total_severe_accidents'})
                                                                

# Calculate the average number of total accidents and fatal accidents by borough
avg_borough_accidents = round(borough_year_accidents.groupby('BOROUGH').agg({'total_accidents': 'mean', 'total_severe_accidents': 'mean'})).reset_index()
avg_borough_accidents.columns = ['BOROUGH', 'annual_total_accidents', 'annual_severe_accidents']
avg_borough_accidents[avg_borough_accidents.columns[1:]] = avg_borough_accidents[avg_borough_accidents.columns[1:]].astype('int')
# Merge the avg_borough_accidents with the boros GeoDataFrame
borough_df = borough_df.merge(avg_borough_accidents, left_on='BoroName', right_on='BOROUGH').drop(columns='BOROUGH')

borough_year_accidents = borough_year_accidents.groupby('BOROUGH').sum()[['total_accidents',	'total_severe_accidents']].reset_index()
borough_year_accidents['severe_accident_rate'] = borough_year_accidents.total_severe_accidents/ borough_year_accidents.total_accidents
borough_df = borough_df.merge(borough_year_accidents, left_on='BoroName', right_on='BOROUGH')
borough_df

Unnamed: 0,BoroCode,BoroName,Shape_Leng,Shape_Area,geometry,population,area,density,annual_total_accidents,annual_severe_accidents,BOROUGH,total_accidents,total_severe_accidents,severe_accident_rate
0,5,Staten Island,330454.175933,1623847000.0,"MULTIPOLYGON (((970217.022 145643.332, 970227....",495747,148.9,3327,4625,939,Staten Island,50873,10334,0.203133
1,3,Brooklyn,741227.337073,1937810000.0,"MULTIPOLYGON (((1021176.479 151374.797, 102100...",2736074,179.7,15227,36334,8692,Brooklyn,399674,95613,0.239227
2,4,Queens,896875.396449,3045079000.0,"MULTIPOLYGON (((1029606.077 156073.814, 102957...",2405464,281.5,8542,30805,6681,Queens,338858,73496,0.216893
3,1,Manhattan,358400.912836,636430800.0,"MULTIPOLYGON (((981219.056 188655.316, 980940....",1694263,58.8,28872,25317,4294,Manhattan,278490,47239,0.169625
4,2,Bronx,464475.145651,1186822000.0,"MULTIPOLYGON (((1012821.806 229228.265, 101278...",1472654,109.3,13482,16856,3907,Bronx,185418,42982,0.231811


In [61]:
# Metrics for visualizing accidents relative to the borough characteristics
borough_df['accidents_per_capita'] = borough_df['annual_total_accidents'] / borough_df['population']
borough_df['accidents_per_sqkm'] = round(borough_df['annual_total_accidents'] / borough_df['area'])
borough_df.head()

Unnamed: 0,BoroCode,BoroName,Shape_Leng,Shape_Area,geometry,population,area,density,annual_total_accidents,annual_severe_accidents,BOROUGH,total_accidents,total_severe_accidents,severe_accident_rate,accidents_per_capita,accidents_per_sqkm
0,5,Staten Island,330454.175933,1623847000.0,"MULTIPOLYGON (((970217.022 145643.332, 970227....",495747,148.9,3327,4625,939,Staten Island,50873,10334,0.203133,0.009329,31.0
1,3,Brooklyn,741227.337073,1937810000.0,"MULTIPOLYGON (((1021176.479 151374.797, 102100...",2736074,179.7,15227,36334,8692,Brooklyn,399674,95613,0.239227,0.01328,202.0
2,4,Queens,896875.396449,3045079000.0,"MULTIPOLYGON (((1029606.077 156073.814, 102957...",2405464,281.5,8542,30805,6681,Queens,338858,73496,0.216893,0.012806,109.0
3,1,Manhattan,358400.912836,636430800.0,"MULTIPOLYGON (((981219.056 188655.316, 980940....",1694263,58.8,28872,25317,4294,Manhattan,278490,47239,0.169625,0.014943,431.0
4,2,Bronx,464475.145651,1186822000.0,"MULTIPOLYGON (((1012821.806 229228.265, 101278...",1472654,109.3,13482,16856,3907,Bronx,185418,42982,0.231811,0.011446,154.0


A folium map of NYC divided into the five boroughs is generated using the geo data with the added accident metrics. Additionally, hover tools displaying the borough name and the total number of accidents is added to the map for more detailed insights.

Each borough is colored according to the chosen accident metric.

In [64]:

from ipywidgets import interact
from IPython.display import display

def generateBaseMap(center, zoom=11):
    base_map = folium.Map(location=center, control_scale=True, zoom_start=zoom)
    return base_map
  
# Function to generate the map with different accident metrics
def generate_map(metric):
    by_metric = metric

    # Color map based on the selected metric
    colormap = LinearColormap(
        colors=["#f1eef6", "#bdc9e1", "#74a9cf", "#2b8cbe", "#045a8d"],
        index=None,#borough_df[by_metric].quantile([0, 0.25, 0.5, 0.75, 1]),
        vmin=borough_df[by_metric].min(),
        vmax=borough_df[by_metric].max(),
    )

    # Update the tooltip with the selected metric
    tooltip = GeoJsonTooltip(
        fields=["BoroName", "annual_total_accidents", "annual_severe_accidents", by_metric],
        aliases=["Borough", "Total Accidents (annual avg.)", "Severe Accidents (annual avg.)", by_metric.replace('_', ' ').title()],
        localize=True,
        sticky=False,
        labels=True,
        style="""
            background-color: #F0EFEF;
            border: 2px solid black;
            border-radius: 3px;
            box-shadow: 3px;
        """,
        max_width=800,
    )

    def style_function(feature):
      # Update colors on dropdown change
          accidents_by_metric = feature['properties'][metric]
          return {
              "fillColor": colormap(accidents_by_metric),
              "color": "black",
              "weight": 2,
              "fillOpacity": 0.7,
          }
    # Create the folium map with GeoJson and tooltip
    base_map = generateBaseMap([40.662, -73.9229])
    folium.TileLayer('cartodbpositron').add_to(base_map)
    folium.GeoJson(borough_df, style_function=style_function, tooltip=tooltip).add_to(base_map)

    # Add the colormap
    colormap.add_to(base_map)

    # Display the map
    display(base_map)

interact(generate_map, metric=['accidents_per_sqkm', 'accidents_per_capita', 'total_accidents', 'severe_accident_rate'])

# # Dropdown widget
# dropdown = widgets.Dropdown(
#     options=[
#         ("Accidents by fatality", "total_fatal_accidents"),
#         ("Accidents by frequency", "total_accidents"),
#         ("Accidents per capita (annual avg.)", "accidents_per_capita"),
#         ("Accidents per Sq. Km. (annual avg.)", "accidents_per_sqkm"),
#     ],
#     value="total_fatal_accidents",
#     description="Select metric:",
#     disabled=False,
# )

# # Update the map when the dropdown value changes
# def on_change(change):
#     if change["type"] == "change" and change["name"] == "value":
#         clear_output(wait=True)
#         display(dropdown)
#         generate_map(change["new"])

# dropdown.observe(on_change)
# display(dropdown)
# generate_map("total_fatal_accidents")

# base_map.save('nyc_accidents_map.html')



interactive(children=(Dropdown(description='metric', options=('accidents_per_sqkm', 'accidents_per_capita', 't…

<function __main__.generate_map(metric)>

In [67]:

from ipywidgets import interact
from IPython.display import display

def generateBaseMap(center, zoom=10):
    base_map = folium.Map(location=center, control_scale=True, zoom_start=zoom)
    return base_map
  
# Function to generate the map with different accident metrics
def generate_map(metric):
    by_metric = metric

    # Color map based on the selected metric
    colormap = LinearColormap(
        colors=["#f1eef6", "#bdc9e1", "#74a9cf", "#2b8cbe", "#045a8d"],
        index=None, #borough_df[by_metric].quantile([0, 0.25, 0.5, 0.75, 1]),
        vmin=borough_df[by_metric].min(),
        vmax=borough_df[by_metric].max(),
    )

    # Update the tooltip with the selected metric
    tooltip = GeoJsonTooltip(
        fields=["BoroName", "annual_total_accidents", "annual_severe_accidents", by_metric],
        aliases=["Borough", "Total Accidents (annual avg.)", "Severe Accidents (annual avg.)", by_metric.replace('_', ' ').title()],
        localize=True,
        sticky=False,
        labels=True,
        style="""
            background-color: #F0EFEF;
            border: 2px solid black;
            border-radius: 3px;
            box-shadow: 3px;
        """,
        max_width=800,
    )

    def style_function(feature):
      # Update colors on dropdown change
          accidents_by_metric = feature['properties'][metric]
          return {
              "fillColor": colormap(accidents_by_metric),
              "color": "black",
              "weight": 2,
              "fillOpacity": 0.7,
          }
    # Create the folium map with GeoJson and tooltip
    base_map = generateBaseMap([40.662, -73.9229])
    folium.TileLayer('cartodbpositron').add_to(base_map)
    folium.GeoJson(borough_df, style_function=style_function, tooltip=tooltip).add_to(base_map)

    # Add the colormap
    colormap.add_to(base_map)
    #base_map.save('temp_map.html')
    return base_map


interact(generate_map, metric=['accidents_per_sqkm', 'accidents_per_capita', 'total_accidents', 'severe_accident_rate'])

def save_map(metric):
    base_map = generate_map(metric)
    base_map.save(os.path.join(os.path.abspath('../..'), 'docs','assets', f'temp_map_{metric}.html'))
    base_map.save(f'../../docs/assets/temp_map_{metric}.html')


for m in ['accidents_per_sqkm', 'accidents_per_capita', 'total_accidents', 'severe_accident_rate']:
    save_map(m)

# main html to fetch each map with dropdown menu    
html_template = '''
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Interactive Map</title>
<script>
function update_map() {
    var selected_metric = document.getElementById("metric_dropdown").value;
    document.getElementById("map_iframe").src = "/assets/temp_map_" + selected_metric + ".html";
}
</script>
</head>
<body>
    <select id="metric_dropdown" onchange="update_map()">
        <option value="accidents_per_sqkm">Accidents per sq. km</option>
        <option value="accidents_per_capita">Accidents per capita</option>
        <option value="total_accidents">Total accidents</option>
         <option value="severe_accident_rate">Severe accident rate</option>
    </select>
    <iframe id="map_iframe" src="/assets/temp_map_accidents_per_sqkm.html" width="100%" height="600" frameborder="0" style="border:0" allowfullscreen></iframe>
</body>
</html>
'''

with open('../../docs/_includes/nyc_accidents_map.html', 'w') as f:
    f.write(html_template)



interactive(children=(Dropdown(description='metric', options=('accidents_per_sqkm', 'accidents_per_capita', 't…

In [20]:
html_template = '''
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Interactive Map</title>
<script>
function update_map() {
    var selected_metric = document.getElementById("metric_dropdown").value;
    document.getElementById("map_iframe").src = "temp_map_" + selected_metric + ".html";
}
</script>
</head>
<body>
    <select id="metric_dropdown" onchange="update_map()">
        <option value="accidents_per_sqkm">Accidents per sq. km</option>
        <option value="accidents_per_capita">Accidents per capita</option>
        <option value="annual_total_accidents">Annual total accidents</option>
         <option value="annual_fatal_accidents">Annual fatal accidents</option>
    </select>
    <iframe id="map_iframe" src="temp_map_accidents_per_sqkm.html" width="100%" height="600" frameborder="0" style="border:0" allowfullscreen></iframe>
</body>
</html>
'''

with open('..nyc_accidents_map.html', 'w') as f:
    f.write(html_template)

