# Pennsylvania, USA

**Source of original dataset:** https://pennshare.maps.arcgis.com/apps/webappviewer/index.html?id=8fdbf046e36e41649bbfd9d7dd7c7e7e

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Suspected Serious Injury, Injury, Possible Injury, Suspected Minor Injury, , PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import glob

Setup input files

In [None]:
data_dir = "../data/pennsylvania/"

files = glob.glob(data_dir+'CRASH*.csv')

Read original data

In [None]:
data_aux = []
for x in files:
    print(x)
    aux = pd.read_csv(x, encoding = "ISO-8859-1")
    data_aux.append(aux)

In [None]:
list_of_dfs = data_aux
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts))) 

Create Datetime column

In [None]:
data['Date'] = data['CRASH_YEAR'].astype('str').astype('str') + '/' +data['CRASH_MONTH'].astype('str')+ '/01 00:00'
data['Date'] = pd.to_datetime(data['Date'])

Setup Longitude & Latitude columns

In [None]:
data['Longitude'] = data['DEC_LONG']
data['Latitude'] = data['DEC_LAT']

Some key statistics

In [None]:
total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data['FATAL_COUNT'].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

serious_injuries = data['MAJ_INJ_COUNT'].sum()
print("There are a total of "+str(serious_injuries)+" suspected seriously injured.")

injuries = data['INJURY_COUNT'].sum()
print("There are a total of "+str(injuries)+" injured.")

min_inj = data['MIN_INJ_COUNT'].sum()
print("There are a total of "+str(min_inj)+" Possible Injury.")

mod = data['MOD_INJ_COUNT'].sum()
print("There are a total of "+str(mod)+" Suspected Minor Injury.")

bicycles = data[data['BICYCLE_COUNT']>0].shape[0]
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['BICYCLE_COUNT']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_pennsylvania.csv')
print('Wrote file to: cycling_safety_pennsylvania.csv')