# Helsinki, Finland

**Source of original dataset:** https://hri.fi/data/en_GB/dataset/liikenneonnettomuudet-helsingissa

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj

Setup input files

In [None]:
data_dir = "../data/helsinki/"

data_file = data_dir + 'liikenneonnettomuudet_Helsingissa.csv'

Read original data

In [None]:
data = pd.read_csv(data_file, encoding = "ANSI", sep=';')

Create Datetime column

In [None]:
data['Date'] = data['VV'].astype('str') + '/01/01 00:00'
data['Date'] = pd.to_datetime(data['Date'])

Setup bicycles column

In [None]:
data['bicycles'] = 0

data.loc[data.LAJI == 'PP', 'bicycles'] = 1

data['fatalities'] = 0
data['injury'] = 0
data['property_damage'] = 0

data.loc[data.VAKAV_A == 3, 'fatalities'] = 1
data.loc[data.VAKAV_A == 2, 'injury'] = 1
data.loc[data.VAKAV_A == 1, 'property_damage'] = 1

Setup latitude & longitude column

In [None]:
from pyproj import Proj, transform

inProj = Proj('epsg:3879')
outProj = Proj('epsg:4326')
i=0
for index, row in data.iterrows():
    x = row['pohj_etrs']
    y = row['ita_etrs']
    
    lat, lon = transform(inProj, outProj, x, y)
    data.at[index, 'LAT'] = lat
    data.at[index, 'LON'] = lon

    i = i + 1

In [None]:
data['Latitude'] = data['LAT']
data['Longitude'] = data['LON']

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data["fatalities"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

injuries = data["injury"].sum()
print("There are a total of "+str(injuries)+" injured.")

property_damage = data["property_damage"].sum()
print("There are a total of "+str(property_damage)+" propertiy damage accidents.")

bicycles = data["bicycles"].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['bicycles']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_helsinki.csv')
print('Wrote file to: cycling_safety_helsinki.csv')