# Los Angeles, USA

**Source of original dataset:** https://data.lacity.org/A-Safe-City/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Serious Injury, Injury, Complain Injury, No Injury

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = "../data/los_angeles/"

data_file = data_dir + "Traffic_Collision_Data_from_2010_to_Present.csv"

Read original data

In [None]:
data = pd.read_csv(data_file)

Transform columns into right type and separate codes

In [None]:
data = pd.concat([data, data['MO Codes'].str.split(' ', expand=True)], axis=1)
data.fillna(value=0, inplace=True)

data = data.rename(columns={0: "MO_CODE_0", 
                            1: "MO_CODE_1",
                            2: "MO_CODE_2",
                            3: "MO_CODE_3",
                            4: "MO_CODE_4",
                            5: "MO_CODE_5",
                            6: "MO_CODE_6",
                            7: "MO_CODE_7",
                            8: "MO_CODE_8",
                            9: "MO_CODE_9"})
data['MO_CODE_0'] = data['MO_CODE_0'].astype('int')
data['MO_CODE_1'] = data['MO_CODE_1'].astype('int')
data['MO_CODE_2'] = data['MO_CODE_2'].astype('int')
data['MO_CODE_3'] = data['MO_CODE_3'].astype('int')
data['MO_CODE_4'] = data['MO_CODE_4'].astype('int')
data['MO_CODE_5'] = data['MO_CODE_5'].astype('int')
data['MO_CODE_6'] = data['MO_CODE_6'].astype('int')
data['MO_CODE_7'] = data['MO_CODE_7'].astype('int')
data['MO_CODE_8'] = data['MO_CODE_8'].astype('int')
data['MO_CODE_9'] = data['MO_CODE_9'].astype('int')

In [None]:
bicycle_related_codes = ['3008', '3016', '3017', '3018', '3021', '345', '1223', '3062', '3603']
data['Bicycles'] = data['MO Codes'].str.contains('|'.join(bicycle_related_codes))
data['Bicycles'].fillna(value=0, inplace=True)
data['Bicycles'] = data['Bicycles'].astype('int')

data['Fatalities'] = data['MO Codes'].str.contains('3027')
data['Fatalities'].fillna(value=0, inplace=True)
data['Fatalities'] = data['Fatalities'].astype('int')

data['SeriousInjuries'] = data['MO Codes'].str.contains('3024')
data['SeriousInjuries'].fillna(value=0, inplace=True)
data['SeriousInjuries'] = data['SeriousInjuries'].astype('int')

data['Injuries'] = data['MO Codes'].str.contains('3025')
data['Injuries'].fillna(value=0, inplace=True)
data['Injuries'] = data['Injuries'].astype('int')

data['ComplainInjuries'] = data['MO Codes'].str.contains('3026')
data['ComplainInjuries'].fillna(value=0, inplace=True)
data['ComplainInjuries'] = data['ComplainInjuries'].astype('int')

data['NoInjuries'] = data['MO Codes'].str.contains('3028')
data['NoInjuries'].fillna(value=0, inplace=True)
data['NoInjuries'] = data['NoInjuries'].astype('int')

p = r'(?P<lat>-?\d+\.\d+).*?(?P<lon>-?\d+\.\d+)'                                                       
data[['Latitude', 'Longitude']] = data['Location'].str.extract(p, expand=True).astype(float)

Create Datetime column

In [None]:
data['Date'] = pd.to_datetime(data['Date Occurred'])

Some key statistics

In [None]:
print('Accidents between '+str(data['Date Occurred'].min())+' and '+str(data['Date Occurred'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["Fatalities"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

serious_injuries = data["SeriousInjuries"].sum()
print("There are a total of "+str(serious_injuries)+" seriously injured.")

injuries = data["Injuries"].sum()
print("There are a total of "+str(injuries)+" injured.")


complain_injuries = data["ComplainInjuries"].sum()
print("There are a total of "+str(complain_injuries)+" complain injured.")

no_injuries = data["NoInjuries"].sum()
print("There are a total of "+str(serious_injuries)+" no injured.")

bicycles = data["Bicycles"].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['Bicycles']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_los_angeles.csv')
print('Wrote file to: cycling_safety_los_angeles.csv')