## Stage 0
Extracting data from https://data.cityofnewyork.us/Public-Safety/NYPD-Motor-Vehicle-Collisions-Crashes/h9gi-nx95 and filtering by borough and cyclists who were killed or injured.

In [59]:
import requests
boroughs = ['BRONX', 'QUEENS', 'STATEN ISLAND', 'BROOKLYN', 'MANHATTAN']
borough = boroughs[-1]
url = 'https://data.cityofnewyork.us/resource/h9gi-nx95.json?' + \
      '$$app_token=HDCRkI7kFAjtZfZUc9ww2WCpM&' + \
      f'$where=borough="{borough}" AND' + \
      '(number_of_cyclist_injured>0 OR number_of_cyclist_killed>0) AND' + \
      '(crash_date between "2019-06-01T12:00:00" and "2019-09-01T12:00:00")'

req = requests.get(url)
cycle_data = eval(req.text)

'https://data.cityofnewyork.us/resource/h9gi-nx95.json?$$app_token=HDCRkI7kFAjtZfZUc9ww2WCpM&$where=borough="MANHATTAN" AND(number_of_cyclist_injured>0 OR number_of_cyclist_killed>0) AND(crash_date between "2019-06-01T12:00:00" and "2019-09-01T12:00:00")'

In [58]:
len(cycle_data)

335

In [48]:
import datetime as dt

for c in cycle_data:
    time = c['crash_date'].split('T')[0] + 'T' + c['crash_time']
    time_of_crash = dt.datetime.strptime(time, '%Y-%m-%dT%H:%M')
    c['crash_datetime'] = time_of_crash

## Stage 1
Store the data from Stage 0 in a database using a proper data model

In [49]:
import sqlite3

def connect_db():
    try:
        conn = sqlite3.connect('./data/cyclist_vehicle_collisons.db', detect_types=sqlite3.PARSE_DECLTYPES)
    except sqlite3.Error as err:
        print(err)
    if conn:
        return conn

In [50]:
conn = connect_db()
cursor = conn.cursor()
create_table_sql = '''
CREATE TABLE IF NOT EXISTS collisions(
 collision_id INTEGER PRIMARY KEY,
 crash_datetime DATETIME,
 latitude DOUBLE,
 longitude DOUBLE,
 cyclist_injured INTEGER,
 cyclist_killed INTEGER,
 borough VARCHAR(255)
)
'''
cursor.execute(create_table_sql)
conn.commit()

In [51]:
for c in cycle_data:
    try:
        insert_cycle_data_sql = f'''
        INSERT OR IGNORE INTO collisions(
                               collision_id, 
                               crash_datetime, 
                               latitude, 
                               longitude, 
                               cyclist_injured, 
                               cyclist_killed,
                               borough)
            VALUES('{c['collision_id']}', 
                   '{c['crash_datetime']}', 
                   '{c['latitude']}', 
                   '{c['longitude']}',
                   '{c['number_of_cyclist_injured']}',
                   '{c['number_of_cyclist_killed']}',
                   '{c['borough']}')
        '''
        cursor.execute(insert_cycle_data_sql)
    except KeyError:
        # don't include data points that don't have locations
        continue
        
conn.commit()
conn.close()

## Stage 2
Visualize in the data in a map and put it in a website

In [52]:
import pandas as pd

conn = connect_db()
select_data_sql = f"SELECT * FROM collisions WHERE borough='{borough}'"
df = pd.read_sql_query(select_data_sql, conn)
conn.close()

In [53]:
df.shape

(326, 7)

In [54]:
import folium
from folium.plugins import MarkerCluster
if 'm' not in locals():
    m = folium.Map(location=[40.74527, -73.988573], tiles='Stamen Terrain')
    marker_cluster = MarkerCluster().add_to(m)

df.apply(lambda r: folium.Marker(location=[r['latitude'], r['longitude']]).add_to(marker_cluster), axis=1)

m