# Netherlands

**Source of original dataset:** https://data.amsterdam.nl/datasets/G6xF0loDvppzog/verkeersongevallen-bron-landelijk/

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Serious Injury, Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
from pyproj import Proj, transform

Setup input files

In [None]:
data_dir = "../data/netherlands/"

data_input = {
    '2003':{
        'locations': data_dir+'01-01-2003_31-12-2003/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2003_31-12-2003/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2003_31-12-2003/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2004':{
        'locations': data_dir+'01-01-2004_31-12-2004/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2004_31-12-2004/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2004_31-12-2004/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2005':{
        'locations': data_dir+'01-01-2005_31-12-2005/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2005_31-12-2005/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2005_31-12-2005/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2006':{
        'locations': data_dir+'01-01-2006_31-12-2006/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2006_31-12-2006/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2006_31-12-2006/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2007':{
        'locations': data_dir+'01-01-2007_31-12-2007/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2007_31-12-2007/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2007_31-12-2007/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2008':{
        'locations': data_dir+'01-01-2008_31-12-2008/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2008_31-12-2008/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2008_31-12-2008/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2009':{
        'locations': data_dir+'01-01-2009_31-12-2009/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2009_31-12-2009/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2009_31-12-2009/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2010':{
        'locations': data_dir+'01-01-2010_31-12-2010/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2010_31-12-2010/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2010_31-12-2010/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2011':{
        'locations': data_dir+'01-01-2011_31-12-2011/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2011_31-12-2011/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2011_31-12-2011/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2012':{
        'locations': data_dir+'01-01-2012_31-12-2012/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2012_31-12-2012/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2012_31-12-2012/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2013':{
        'locations': data_dir+'01-01-2013_31-12-2013/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2013_31-12-2013/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2013_31-12-2013/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2014':{
        'locations': data_dir+'01-01-2014_31-12-2014/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2014_31-12-2014/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2014_31-12-2014/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2015':{
        'locations': data_dir+'01-01-2015_31-12-2015/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2015_31-12-2015/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2015_31-12-2015/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2016':{
        'locations': data_dir+'01-01-2016_31-12-2016/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2016_31-12-2016/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2016_31-12-2016/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2017':{
        'locations': data_dir+'01-01-2017_31-12-2017/02 TOTNL J-N-J-N/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2017_31-12-2017/02 TOTNL J-N-J-N/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2017_31-12-2017/02 TOTNL J-N-J-N/Ongevallengegevens/partijen.txt'
    },
    '2018':{
        'locations': data_dir+'01-01-2018_31-12-2018/Netwerkgegevens/puntlocaties.txt',
        'accidents': data_dir+'01-01-2018_31-12-2018/Ongevallengegevens/ongevallen.txt',
        'parties'  : data_dir+'01-01-2018_31-12-2018/Ongevallengegevens/partijen.txt'
    },
}

In [None]:
def location_match(FK_VELD5):
    return data_locations.loc[FK_VELD5]['X_COORD'].copy(), data_locations.loc[FK_VELD5]['Y_COORD'].copy()

In [None]:
data_aux = []

for year, files in data_input.items():
    print(year)
    
    # Read files
    data_locations = pd.read_csv(files['locations'])
    data_accidents = pd.read_csv(files['accidents'])
    data_parties   = pd.read_csv(files['parties'])
    
    
    # Condense parties file
    data_parties['OTE_ID'] = data_parties['OTE_ID'].fillna(0).astype('int')
    parties_only_vehicles = data_parties[['VKL_NUMMER','NUMMER','OTE_ID']].copy()
    
    parties_only_vehicles = parties_only_vehicles.groupby(['VKL_NUMMER']).OTE_ID.apply(list)
    parties_only_vehicles = parties_only_vehicles.to_frame()
    
    parties_only_vehicles['bicycles'] = list(map(lambda j: 64 in j, parties_only_vehicles.OTE_ID))
    parties_only_vehicles['pedestrians'] = list(map(lambda j: 71 in j, parties_only_vehicles.OTE_ID))
    parties_only_vehicles.reset_index(level=0, inplace=True)
    
    # Merge accidents and involved parties files
    data = pd.merge(data_accidents, parties_only_vehicles, on="VKL_NUMMER", how="left")
    data = data.sort_values(by ='VKL_NUMMER')
    
    # Filter bicycles
    data = data[data['bicycles'] == True]
    
    # Add locations to accidents
    data_locations = data_locations.set_index('FK_VELD5')
    data[['X_COORD', 'Y_COORD']] = data.apply(lambda x: location_match(x['FK_VELD5']), axis=1, result_type='expand')
    
    data_aux.append(data)

Merge years

In [None]:
list_of_dfs = data_aux
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]
data = pd.DataFrame(list(chain(*list_of_dicts)))

Transform locations to latitude & longitude
 (using the projection referenced here: https://gis.stackexchange.com/questions/122117/understanding-coordinates-in-netherlands-dataset)

In [None]:
inProj = Proj('epsg:28992')
outProj = Proj('epsg:4326')

In [None]:
def project_xy(X_COORD, Y_COORD):
    return transform(inProj, outProj, X_COORD, Y_COORD)

In [None]:
data[['Latitude', 'Longitude']] = data.apply(lambda x: project_xy(x['X_COORD'], x['Y_COORD']), axis=1, result_type='expand')

Setup outcomes

In [None]:
data['Fatalities'] = 0
data.loc[(data['AP3_CODE'] == 'DOD') | (data['AP4_CODE'] == 'DOD') | (data['AP5_CODE'] == 'DOD'), 'Fatalities'] = 1

data['Injuries'] = 0
data.loc[(data['AP3_CODE'] == 'LET') | (data['AP4_CODE'] == 'LLI') | (data['AP4_CODE'] == 'LZW') | (data['AP5_CODE'] == 'LEH') | (data['AP5_CODE'] == 'LOV') | (data['AP5_CODE'] == 'LZH'), 'Injuries'] = 1


Setup Datetime

In [None]:
data['Date'] = pd.to_datetime(data['JAAR_VKL'].astype(int), format='%Y')

Some key statistics

In [None]:
total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" bicycles accidents.")

fatalities = data["Fatalities"].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

injuries = data["Injuries"].sum()
print("There are a total of "+str(injuries)+" seriously injured.")

Slice all bicycle accidents

In [None]:
data_bicycles = data

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_netherlands.csv')
print('Wrote file to: cycling_safety_netherlands.csv')