# Louisville, USA

**Source of original dataset:** https://data.louisvilleky.gov/dataset/traffic-collisions

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = "../data/louisville/"
data_file = data_dir + "JEFFERSON COUNTY_CRASH DATA_2010-2017.csv"

Read original data

In [None]:
data = pd.read_csv(data_file)

Create Datetime column

In [None]:
data['COLLISION TIME'] = data['COLLISION TIME'].fillna(0).astype('str').str.strip().str.replace('\.0','').str.zfill(4)
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('24','00')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('9122','0000')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('180','0000')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('80','0000')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('1799','0000')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('94','0000')
data['COLLISION TIME'] = data['COLLISION TIME'].str.replace('72','0000')

data['COLLISION TIME']
data['hour'] = data['COLLISION TIME'].str[0:2]
data['minute'] = data['COLLISION TIME'].str[2:4]

In [None]:
data['Date'] = data['COLLISION DATE'].astype('str').astype('str') + ' ' +data['hour'].astype('str') + ':' + data['minute'].astype('str')
data['Date'] = pd.to_datetime(data['Date'])

Setup latitude & longitude column

In [None]:
data['Latitude'] = data['GPS LATITUDE DECIMAL']
data['Longitude'] = data['GPS LONGITUDE DECIMAL']

Setup bicycles column

In [None]:
data_bicycles = data[(data['DIRECTIONAL ANALYSIS'] == 'COLLISION WITH BICYCLE') | (data['DIRECTIONAL ANALYSIS'] == 'COLLISION WITH BICYCLIST')]

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["KILLED"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

injuries = data["INJURED"].sum()
print("There are a total of "+str(injuries)+" major injured.")

bicycles = data_bicycles.shape[0]
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")


Bicycle accidents

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_louisville.csv')
print('Wrote file to: cycling_safety_louisville.csv')