# Colorado, USA

**Source of original dataset:** https://data.drcog.org/data?category[0]=Transportation\&page=1\&q=crash\&sort=title

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** fatal, incapacitation, non_incapacitation, complaint_injury, no_injury 

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj
import geopandas as gpd

Setup input files

In [None]:
data_dir = "../data/colorado/"

accident_files = ["drcog_crash_2004.shp",
                  "drcog_crash_2005.shp",
                  "drcog_crash_2006.shp",
                  "drcog_crash_2007.shp",
                  "drcog_crash_2008.shp",
                  "drcog_crash_2009.shp",
                  "drcog_crash_2010.shp",
                  "drcog_crash_2011.shp",
                  "drcog_crash_2012.shp",
                  "drcog_crash_2013.shp",
                  "drcog_crash_2014.shp",
                  "drcog_crash_2015.shp",
                  "drcog_crash_2016.shp",
                  "drcog_crash_2017.shp",
                  "drcog_crash_2018.shp"]
accidents_data_files = [data_dir + s for s in accident_files]

Read original data

In [None]:
from pandas.io.parsers import ParserError
data_aux = []

for i, accidents_data in enumerate(accidents_data_files):
    data_aux.append(gpd.read_file(accidents_data))

In [None]:
list_of_dfs = data_aux
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts)))  
print(data.shape)

Create Datetime column

In [None]:
data['hour'] = '00:00'

In [None]:
data = data[data['accident_d'].notna()]
data['Date'] = data['accident_d'].astype('str') +' '+ data['hour'].astype('str')

data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d %H:%M')


Setup bicycles column

In [None]:
data['fatal']= 0
data['incapacitation']= 0
data['non_incapacitation']= 0
data['complaint_injury']= 0
data['no_injury']= 0

data.loc[data['di1'] == '0', 'no_injury'] = 1
data.loc[data['di1'] == '01', 'complaint_injury'] = 1
data.loc[data['di1'] == '02', 'non_incapacitation'] = 1
data.loc[data['di1'] == '03', 'incapacitation'] = 1
data.loc[data['di1'] == '04', 'fatal'] = 1

data.loc[data['di2'] == '0', 'no_injury'] = 1
data.loc[data['di2'] == '01', 'complaint_injury'] = 1
data.loc[data['di2'] == '02', 'non_incapacitation'] = 1
data.loc[data['di2'] == '03', 'incapacitation'] = 1
data.loc[data['di2'] == '04', 'fatal'] = 1

data.loc[data['di3'] == '0', 'no_injury'] = 1
data.loc[data['di3'] == '01', 'complaint_injury'] = 1
data.loc[data['di3'] == '02', 'non_incapacitation'] = 1
data.loc[data['di3'] == '03', 'incapacitation'] = 1
data.loc[data['di3'] == '04', 'fatal'] = 1

In [None]:
data['Bicycles'] = 0
bicycle_related_codes = ['13']

data.loc[data['vt1'] == '13', 'Bicycles'] = 1
data.loc[data['vt2'] == '13', 'Bicycles'] = 1
data.loc[data['vt3'] == '13', 'Bicycles'] = 1

data.loc[data['act1'] == '15', 'Bicycles'] = 1
data.loc[data['act2'] == '15', 'Bicycles'] = 1
data.loc[data['act3'] == '15', 'Bicycles'] = 1
data.loc[data['mhe'] == '15', 'Bicycles'] = 1
data.loc[data['acctype'] == '15', 'Bicycles'] = 1


data['Bicycles'] = data['Bicycles'].astype('int')

Setup latitude & longitude column

In [None]:
data['Latitude'] = data['latitude']
data['Longitude'] = data['longitude']

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data['fatal'].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

incapacitated = data['incapacitation'].sum()
print("There are a total of "+str(incapacitated)+" incapacitated.")

non_incapacitating = data['non_incapacitation'].sum()
print("There are a total of "+str(non_incapacitating)+" non_incapacitating.")

complaint_injuries = data['complaint_injury'].sum()
print("There are a total of "+str(complaint_injuries)+" complaint_injuries.")

non_injuries = data['no_injury'].sum()
print("There are a total of "+str(non_injuries)+" non_injuries.")

bicycles = data[data['Bicycles']>0].shape[0]
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['Bicycles']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_colorado.csv')
print('Wrote file to: cycling_safety_colorado.csv')