# Pasadena, USA

**Source of original dataset:** http://data.cityofpasadena.net/datasets/85f49ea583c24056968bee6e28162da4_0/data

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj
import geopandas as gpd

Setup input files

In [None]:
data_dir = "../data/pasadena/"
data_file = data_dir + "46f82d69-b05e-4ca3-bc9d-c17b6586986b2020329-1-jvidtp.ygeq.shp"

Read original data

In [None]:
data = gpd.read_file(data_file)

Create Datetime column

In [None]:
data = data[data['Time'].notna()]

In [None]:
data['Date_aux'] = data['Date'].astype(str) + ' ' + data['Time'].astype(str)

In [None]:
data['Date'] = pd.to_datetime(data['Date_aux'])

Setup Longitude & Latitude columns

In [None]:
data['Longitude'] = data['geometry'].apply(lambda p: p.x)
data['Latitude'] = data['geometry'].apply(lambda p: p.y)

Setup bicycles filter

In [None]:
mask = (data['VehType1'] == "Bicycle") | (data['VehType2'] == "Bicycle")
bicycles = data.loc[mask].copy().reset_index(drop=True)

In [None]:
data["NoKilled"] = data["NoKilled"].fillna(0).astype('int')
data["NoInjured"] = data["NoInjured"].fillna(0).astype('int')

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["NoKilled"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

serious_injuries = data[data["Injury"] == "Severe Injury"]
print("There are a total of "+str(serious_injuries.shape[0])+" serious injured.")

injuries = data["NoInjured"].sum()
print("There are a total of "+str(injuries)+" injured.")

print("There are a total of "+str(bicycles.shape[0])+" accidents involving bicycles.")

Slice all bicycle accidents

In [None]:
data_bicycles = bicycles

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_pasadena.csv')
print('Wrote file to: cycling_safety_pasadena.csv')