# Detroit, USA

**Source of original dataset:** https://semcog.org/traffic-crash-data

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatalities, A-level Injury, B-level Injury, C-level Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = "../data/detroit/"
accident_files = ["2009-2012_Crash Search Results.csv",
                  "2013-2015_Crash Search Results.csv",
                  "2016-2018_Crash Search Results.csv"]
accidents_data_files = [data_dir + s for s in accident_files]

Read original data

In [None]:
data_aux = []

for i, accidents_data in enumerate(accidents_data_files):
    data_acc = pd.read_csv(accidents_data, encoding = "ANSI")
    
    data_aux.append(data_acc)  

In [None]:
list_of_dfs = data_aux
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts))) 

Create Datetime column

In [None]:
data.loc[data['MONTH']=='Jan', 'MONTH'] = '01'
data.loc[data['MONTH']=='Feb', 'MONTH'] = '02'
data.loc[data['MONTH']=='Mar', 'MONTH'] = '03'
data.loc[data['MONTH']=='Apr', 'MONTH'] = '04'
data.loc[data['MONTH']=='May', 'MONTH'] = '05'
data.loc[data['MONTH']=='Jun', 'MONTH'] = '06'
data.loc[data['MONTH']=='Jul', 'MONTH'] = '07'
data.loc[data['MONTH']=='Aug', 'MONTH'] = '08'
data.loc[data['MONTH']=='Sep', 'MONTH'] = '09'
data.loc[data['MONTH']=='Oct', 'MONTH'] = '10'
data.loc[data['MONTH']=='Nov', 'MONTH'] = '11'
data.loc[data['MONTH']=='Dec', 'MONTH'] = '12'

data.loc[data['TIME']=='1am', 'TIME'] = '01:00'
data.loc[data['TIME']=='2am', 'TIME'] = '02:00'
data.loc[data['TIME']=='3am', 'TIME'] = '03:00'
data.loc[data['TIME']=='4am', 'TIME'] = '04:00'
data.loc[data['TIME']=='5am', 'TIME'] = '05:00'
data.loc[data['TIME']=='6am', 'TIME'] = '06:00'
data.loc[data['TIME']=='7am', 'TIME'] = '07:00'
data.loc[data['TIME']=='8am', 'TIME'] = '08:00'
data.loc[data['TIME']=='9am', 'TIME'] = '09:00'
data.loc[data['TIME']=='10am', 'TIME'] = '10:00'
data.loc[data['TIME']=='11am', 'TIME'] = '11:00'
data.loc[data['TIME']=='12am', 'TIME'] = '12:00'
data.loc[data['TIME']=='1pm', 'TIME'] = '13:00'
data.loc[data['TIME']=='2pm', 'TIME'] = '14:00'
data.loc[data['TIME']=='3pm', 'TIME'] = '1:00'
data.loc[data['TIME']=='4pm', 'TIME'] = '16:00'
data.loc[data['TIME']=='5pm', 'TIME'] = '17:00'
data.loc[data['TIME']=='6pm', 'TIME'] = '18:00'
data.loc[data['TIME']=='7pm', 'TIME'] = '19:00'
data.loc[data['TIME']=='8pm', 'TIME'] = '20:00'
data.loc[data['TIME']=='9pm', 'TIME'] = '21:00'
data.loc[data['TIME']=='10pm', 'TIME'] = '22:00'
data.loc[data['TIME']=='11pm', 'TIME'] = '23:00'
data.loc[data['TIME']=='12pm', 'TIME'] = '00:00'

data.loc[data['TIME']=='Unk', 'TIME'] = '00:00'

In [None]:
data['Date'] = data['DATE'].astype('str').str.zfill(2) + '/' + data['MONTH'].astype('str') + '/' + data['YEAR'].astype('str') + ' ' +data['TIME'].astype('str')
data['Date'] = pd.to_datetime(data['Date'])

data.head()

Setup bicycles column

In [None]:
data['FACTOR'] = data['FACTOR'].fillna('0')

data['bicycle']= 0

data['fatal']= 0
data['a_level']= 0
data['b_level']= 0
data['c_level']= 0
data['pdo']= 0

data.loc[data['FACTOR'].str.contains('B', na=False), 'bicycle'] = 1

data.loc[data['SEVERITY'] == 'Fatal', 'fatal'] = 1
data.loc[data['SEVERITY'] == 'A-level', 'a_level'] = 1
data.loc[data['SEVERITY'] == 'B-level', 'b_level'] = 1
data.loc[data['SEVERITY'] == 'C-level', 'c_level'] = 1
data.loc[data['SEVERITY'] == 'PDO', 'pdo'] = 1

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["fatal"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

pdo = data["pdo"].sum()
print("There are a total of "+str(pdo)+" property damage only accidents.")

bicycles = data["bicycle"].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['bicycle']>0]

In [None]:
data_bicycles.head()

In [None]:
data_bicycles

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_detroit.csv')
print('Wrote file to: cycling_safety_detroit.csv')