# New York, USA

**Source of original dataset:** https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = '../data/new_york/'
new_york_data = data_dir + 'Motor_Vehicle_Collisions_-_Crashes.csv'

Read original data

In [None]:
data = pd.read_csv(new_york_data);

Create Datetime column

In [None]:
data['CRASH DATE'] = data['CRASH DATE'].astype('str')
data['Date'] = pd.to_datetime(data['CRASH DATE'], format='%m/%d/%Y')

Setup bicycles filter

In [None]:
index_1 = data.index[(data['VEHICLE TYPE CODE 1'] == 'BICYCLE') |
                     (data['VEHICLE TYPE CODE 1'] == 'Bike') |
                     (data['VEHICLE TYPE CODE 1'] == 'E-Bik') |
                     (data['VEHICLE TYPE CODE 1'] == 'E bik') |
                     (data['VEHICLE TYPE CODE 1'] == 'E - B') |
                     (data['VEHICLE TYPE CODE 1'] == 'E-BIK') |
                     (data['VEHICLE TYPE CODE 1'] == 'Bicyc') |
                     (data['VEHICLE TYPE CODE 1'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 1'] == 'e-bik') |
                     (data['VEHICLE TYPE CODE 1'] == 'EBIKE') |
                     (data['VEHICLE TYPE CODE 1'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 1'] == 'BICYC')].tolist()
data.loc[index_1, 'VEHICLE TYPE CODE 1'] = 'bicycle'

index_2 = data.index[(data['VEHICLE TYPE CODE 2'] == 'BICYCLE') |
                     (data['VEHICLE TYPE CODE 2'] == 'Bike') |
                     (data['VEHICLE TYPE CODE 2'] == 'E-Bik') |
                     (data['VEHICLE TYPE CODE 2'] == 'E bik') |
                     (data['VEHICLE TYPE CODE 2'] == 'E - B') |
                     (data['VEHICLE TYPE CODE 2'] == 'E-BIK') |
                     (data['VEHICLE TYPE CODE 2'] == 'Bicyc') |
                     (data['VEHICLE TYPE CODE 2'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 2'] == 'e-bik') |
                     (data['VEHICLE TYPE CODE 2'] == 'EBIKE') |
                     (data['VEHICLE TYPE CODE 2'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 2'] == 'BICYC')].tolist()
data.loc[index_2, 'VEHICLE TYPE CODE 2'] = 'bicycle'

index_3 = data.index[(data['VEHICLE TYPE CODE 3'] == 'BICYCLE') |
                     (data['VEHICLE TYPE CODE 3'] == 'Bike') |
                     (data['VEHICLE TYPE CODE 3'] == 'E-Bik') |
                     (data['VEHICLE TYPE CODE 3'] == 'E bik') |
                     (data['VEHICLE TYPE CODE 3'] == 'E - B') |
                     (data['VEHICLE TYPE CODE 3'] == 'E-BIK') |
                     (data['VEHICLE TYPE CODE 3'] == 'Bicyc') |
                     (data['VEHICLE TYPE CODE 3'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 3'] == 'e-bik') |
                     (data['VEHICLE TYPE CODE 3'] == 'EBIKE') |
                     (data['VEHICLE TYPE CODE 3'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 3'] == 'BICYC')].tolist()
data.loc[index_3, 'VEHICLE TYPE CODE 3'] = 'bicycle'

index_4 = data.index[(data['VEHICLE TYPE CODE 4'] == 'BICYCLE') |
                     (data['VEHICLE TYPE CODE 4'] == 'Bike') |
                     (data['VEHICLE TYPE CODE 4'] == 'E-Bik') |
                     (data['VEHICLE TYPE CODE 4'] == 'E bik') |
                     (data['VEHICLE TYPE CODE 4'] == 'E - B') |
                     (data['VEHICLE TYPE CODE 4'] == 'E-BIK') |
                     (data['VEHICLE TYPE CODE 4'] == 'Bicyc') |
                     (data['VEHICLE TYPE CODE 4'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 4'] == 'e-bik') |
                     (data['VEHICLE TYPE CODE 4'] == 'EBIKE') |
                     (data['VEHICLE TYPE CODE 4'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 4'] == 'BICYC')].tolist()
data.loc[index_4, 'VEHICLE TYPE CODE 4'] = 'bicycle'

index_5 = data.index[(data['VEHICLE TYPE CODE 5'] == 'BICYCLE') |
                     (data['VEHICLE TYPE CODE 5'] == 'Bike') |
                     (data['VEHICLE TYPE CODE 5'] == 'E-Bik') |
                     (data['VEHICLE TYPE CODE 5'] == 'E bik') |
                     (data['VEHICLE TYPE CODE 5'] == 'E - B') |
                     (data['VEHICLE TYPE CODE 5'] == 'E-BIK') |
                     (data['VEHICLE TYPE CODE 5'] == 'Bicyc') |
                     (data['VEHICLE TYPE CODE 5'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 5'] == 'e-bik') |
                     (data['VEHICLE TYPE CODE 5'] == 'EBIKE') |
                     (data['VEHICLE TYPE CODE 5'] == 'BICYC') |
                     (data['VEHICLE TYPE CODE 5'] == 'BICYC')].tolist()
data.loc[index_5, 'VEHICLE TYPE CODE 1'] = 'bicycle'

Setup Latitude & longitude columns

In [None]:
data['Longitude'] = data['LONGITUDE']
data['Latitude'] = data['LATITUDE']

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

Slice all bicycle accidents

In [None]:
data_bicycles = data[(data['VEHICLE TYPE CODE 1'] == 'bicycle') |
                     (data['VEHICLE TYPE CODE 2'] == 'bicycle') |
                     (data['VEHICLE TYPE CODE 3'] == 'bicycle') |
                     (data['VEHICLE TYPE CODE 4'] == 'bicycle') |
                     (data['VEHICLE TYPE CODE 5'] == 'bicycle') 
                    ]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_new_york.csv')
print('Wrote file to: cycling_safety_new_york.csv')