# Richmond, USA

**Source of original dataset:** https://data.richmondgov.com/Community-Safety-and-Well-Being/Bicycle-Motor-Vehicle-Accidents/hcga-86ns/data

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Hospitalisation, Medical Treatment, Minor Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj
import glob

Setup input files

In [None]:
data_dir = "../data/richmond/"

Read original data

In [None]:
data = pd.read_csv(data_dir + 'RVA_Bike_Accidents (1).csv', encoding = "ISO-8859-1")

Create Datetime column

In [None]:
data['Time'] = data['Time'].astype(str)
data['Time'] = data['Time'].apply(lambda x: '{0:0>4}'.format(x))
data = data[data['Date'].notna()]

In [None]:
data['Date_aux'] = data['Date'].astype('str') + ' ' + data['Time'].astype('str')
data['Date'] = pd.to_datetime(data['Date_aux'], format='%m/%d/%Y %H%M')

Setup latitude & longitude column

In [None]:
loc = data['Location 1'].str.replace('(','').str.replace(')','').str.split(', ')[:]
data[['Latitude','Longitude']] = pd.DataFrame(loc.tolist(), index= data.index)
data['Latitude'] = data['Latitude'].astype(float)
data['Longitude'] = data['Longitude'].astype(float)

Some key statistics

In [None]:
print('Accidents between '+str(data['REPORTDATE'].min())+' and '+str(data['REPORTDATE'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["FATAL_BICYCLIST"].sum() + data["FATAL_DRIVER"].sum() + data["FATAL_PEDESTRIAN"].sum() + data["FATALPASSENGER"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

major_injuries = data["MAJORINJURIES_BICYCLIST"].sum() + data["MAJORINJURIES_DRIVER"].sum() + data["MAJORINJURIES_PEDESTRIAN"].sum() + data["MAJORINJURIESPASSENGER"].sum()
print("There are a total of "+str(major_injuries)+" major injured.")

minor_injuries = data["MINORINJURIES_BICYCLIST"].sum() + data["MINORINJURIES_DRIVER"].sum() + data["MINORINJURIES_PEDESTRIAN"].sum() + data["MINORINJURIESPASSENGER"].sum()
print("There are a total of "+str(minor_injuries)+" minor injured.")

unknown_injuries = data["UNKNOWNINJURIES_BICYCLIST"].sum() + data["UNKNOWNINJURIES_DRIVER"].sum() + data["UNKNOWNINJURIES_PEDESTRIAN"].sum() + data["UNKNOWNINJURIESPASSENGER"].sum()
print("There are a total of "+str(unknown_injuries)+" unknown injured.")

bicycles = data["TOTAL_BICYCLES"].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_richmond.csv')
print('Wrote file to: cycling_safety_richmond.csv')