# Germany

**Source of original dataset:** https://unfallatlas.statistikportal.de/

**Location of accidents:** Latitude, Longitude

**Date of accidents:** Date

**Outcome of accidents:** Fatality, Serious Injury, Minor Injury, PDO

In [None]:
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', -1)
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from itertools import chain
import matplotlib.pyplot as plt
import pyproj
import geopandas as gpd

Setup input files

In [None]:
data_dir = "../data/germany/"

accident_files = ["Shapefile_2016/Unfaelle_2016_LinRef.shp",
                  "Shapefile_2017/Unfallorte2017_LinRef.shp",
                  "Shapefile_2018/Unfallorte2018_LinRef.shp",
                  "Shapefile_2019/Unfallorte2019_LinRef.shp"]
accidents_data_files = [data_dir + s for s in accident_files]

Read original data

In [None]:
from pandas.io.parsers import ParserError
data_aux = []

for i, accidents_data in enumerate(accidents_data_files):
    data_aux.append(gpd.read_file(accidents_data))

In [None]:
list_of_dfs = data_aux
list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_of_dfs]    
data = pd.DataFrame(list(chain(*list_of_dicts)))  

Create Datetime column

In [None]:
data['Date'] = data['UJAHR'].astype('str') +'-'+ data['UMONAT'].astype('str') +'-01 '+ data['USTUNDE'].astype('str') + ':00'

data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d %H:%M')

In [None]:
data['fatal'] = 0
data['serious_injury'] = 0
data['minor_injury'] = 0

data.loc[data['UKATEGORIE'] == '1', 'fatal'] = 1
data.loc[data['UKATEGORIE'] == '2', 'serious_injury'] = 1
data.loc[data['UKATEGORIE'] == '3', 'minor_injury'] = 1

Setup bicycles column

In [None]:
data['Bicycles'] = 0

data.loc[data['IstRad'] == '1', 'Bicycles'] = 1

data['Bicycles'] = data['Bicycles'].astype('int')

Setup latitude & longitude column

In [None]:
data['Latitude'] = data['YGCSWGS84']
data['Longitude'] = data['XGCSWGS84']

Some key statistics

In [None]:
print('Accidents between '+str(data['Date'].min())+' and '+str(data['Date'].max()))

total_accidents = data.shape[0]
print("There are a total of "+str(total_accidents)+" accidents.")

fatalities = data['fatal'].sum()
print("There are a total of "+str(fatalities)+" fatalities.")

serious_injuries = data['serious_injury'].sum()
print("There are a total of "+str(serious_injuries)+" serious injury.")

minor_injuries = data['minor_injury'].sum()
print("There are a total of "+str(minor_injuries)+" minor injury.")

bicycles = data['Bicycles'].sum()
print("There are a total of "+str(bicycles)+" bicycles involved in all the accidents.")

Slice all bicycle accidents

In [None]:
data_bicycles = data[data['Bicycles']>0]

In [None]:
data_bicycles.head()

Save to file

In [None]:
print(data_bicycles.shape)
data_bicycles.to_csv('cycling_safety_germany.csv')
print('Wrote file to: cycling_safety_germany.csv')