# Cambridge, UK

**Source of original dataset:** https://data.cambridgeshireinsight.org.uk/dataset/road-traffic-collisions-location

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Severity (Fatal, Serious, Sligh)

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = "../data/cambridgeshire/"

accident_files = ["RTA Collision 2012 points.csv",
                  "RTA Collision 2013 points.csv",
                  "RTA Collision 2014 points.csv",
                  "RTA Collision 2015 points.csv",
                  "RTA Collision 2016 points.csv",
                  "RTA Collision 2017 points.csv"]
accidents_data_files = [data_dir + s for s in accident_files]

Read original data

In [None]:
from pandas.io.parsers import ParserError
data_aux = []

for i, accidents_data in enumerate(accidents_data_files):

    data_acc = pd.read_csv(accidents_data, encoding = "ANSI")
   
    data_aux.append(data_acc)

In [None]:
list_of_dfs = data_aux[:]
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts)))   

Create Datetime column

In [None]:
data['year'] = data['Date'].astype('str').str.slice(start=0, stop=4).astype('int')
data['month'] = data['Date'].astype('str').str.slice(start=4, stop=6).astype('int')
data['day'] = data['Date'].astype('str').str.slice(start=6, stop=8).astype('int')
data['Date'] = data['day'].astype('str') + '/' + data['month'].astype('str') + '/' + data['year'].astype('str')
data['Date'] = pd.to_datetime(data['Date'])

Setup bicycles column

In [None]:
data.loc[data['Cycle']=='Y','Bicycles'] = 1
data.loc[data['Cycle']=='N','Bicycles'] = 0

data.loc[(data['Cycle'] == 'Y') | (data['Cycles']>0), 'Bicycles'] = 1
data.loc[(data['Cycle'] == 'N') & ((data['Cycles']<=0) | (data['Cycles'].isnull())), 'Bicycles'] = 0

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

bicycles = data["Bicycles"].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['Bicycles']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_cambridge.csv')
print('Wrote file to: cycling_safety_cambridge.csv')