# LOR Mapping for Bike Accidents

In [1]:
!pip install -q geopandas

In [2]:
#Imports

import pandas as pd
import numpy as np
import requests

#mapping
import folium
import geopandas
from shapely.geometry import Point, Polygon

from math import isnan


In [3]:
data2020 = pd.read_csv('../raw_data/accident/AfSBBB_BE_LOR_Strasse_Strassenverkehrsunfaelle_2020_Datensatz.csv', sep=';')
data2019 = pd.read_csv('../raw_data/accident/AfSBBB_BE_LOR_Strasse_Strassenverkehrsunfaelle_2019_Datensatz.csv', sep=';')
data2018 = pd.read_csv('../raw_data/accident/AfSBBB_BE_LOR_Strasse_Strassenverkehrsunfaelle_2018_Datensatz.csv', sep=';')


In [4]:
data2020.head()

Unnamed: 0,OBJECTID,LAND,BEZ,LOR,LOR_ab_2021,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,...,IstPKW,IstFuss,IstKrad,IstGkfz,IstSonstige,USTRZUSTAND,LINREFX,LINREFY,XGCSWGS84,YGCSWGS84
0,3187,11,8,8010510.0,8100521.0,2020,1,11,4,2,...,1,0,0,1,0,1,8025995332,5821795373,1345500598,524623009
1,3198,11,4,4041137.0,4400727.0,2020,1,2,3,2,...,0,0,0,1,0,1,7912642064,5824629955,1329111618,5249387309
2,3215,11,4,4041239.0,4400830.0,2020,1,7,1,3,...,0,0,0,0,1,1,7922948083,5823598115,1330534822,5248407191
3,3224,11,2,2040502.0,2400521.0,2020,1,22,6,2,...,1,0,0,0,0,0,8010242746,5827862923,1343737099,52517556
4,3241,11,4,4041137.0,4400727.0,2020,1,17,1,3,...,0,0,1,0,0,1,7918892861,5824003006,1329974796,5248791866


In [5]:
data2019.nunique()

OBJECTID       13390
LAND               1
BEZ               12
LOR              442
STRASSE          441
UJAHR              1
UMONAT            12
USTUNDE           24
UWOCHENTAG         7
UKATEGORIE         3
UART              10
UTYP1              7
ULICHTVERH         3
IstRad             2
IstPKW             2
IstFuss            2
IstKrad            2
IstGkfz            2
IstSonstige        3
USTRZUSTAND        4
LINREFX        13256
LINREFY        13248
XGCSWGS84      10492
YGCSWGS84       8813
dtype: int64

In [6]:
#import of preprocessed csv

df = pd.read_csv('../raw_data/df_0103.csv')

In [7]:
#get rid of unwanted and strange index column
df = df.drop(columns='Unnamed: 0')

In [8]:
df.head()

Unnamed: 0,district,LOR,street,LOR_2021,year,month,hour,weekday,category,accident_kind,...,car,pedestrian,motorcycle,goods_vehicle,others,road_condition,LINREFX,LINREFY,XGCSWGS84,YGCSWGS84
0,3,3020209.0,Niederschönhausen,3200206.0,2018,1,15,4,3,6,...,0,1,0,0,0,1,798261.3849,5835047.26,13.403228,52.583472
1,3,3071639.0,Bötzowstraße,3701660.0,2018,1,11,5,3,5,...,1,0,0,0,0,1,800551.721,5829808.492,13.432186,52.535255
2,2,2050602.0,Samariterviertel,2500729.0,2018,1,8,2,3,5,...,1,0,0,0,0,0,803320.7292,5827627.446,13.470897,52.514173
3,1,1011204.0,Leipziger Straße ...,1100207.0,2018,1,19,4,3,5,...,1,0,0,0,0,1,798174.6913,5826939.978,13.394673,52.510848
4,9,9020601.0,Schnellerstraße ...,9200613.0,2018,1,18,4,2,5,...,1,0,0,0,0,1,806109.6297,5821644.404,13.506372,52.458993


In [9]:
#convert LOR to LOR_2021 by creating a dictionary out of existing rows that contain both values

#function to create the dictionary
lor_dict = {}
def create_lor_dict(df):
    for idx, accident in df.iterrows():
            lor_dict.update({df['LOR'][idx]: df['LOR_2021'][idx]})
    return lor_dict



In [10]:
#calling function on our df to create the dict
create_lor_dict(df[df['year'] != 2019]);

In [11]:
# functional removal of nan keys from dict
clean_dict = filter(lambda k: not isnan(k), lor_dict)

In [12]:
clean_dict = {k: lor_dict[k] for k in lor_dict if not isnan(k)}

In [13]:
len(clean_dict.keys())

442

In [14]:
len(lor_dict)

444

In [15]:
#using map to create missing LOR_2021 values with the dict
df['LOR_2021'] = df['LOR_2021'].fillna(df['LOR'].map(clean_dict))

In [16]:
df['LOR_2021'].isnull().sum()

3

In [17]:
df = df[df['LOR_2021'].notna()]

In [18]:
df['LOR_2021'].isnull().sum()

0

In [21]:
from pathlib import Path

df.to_csv(Path('../raw_data/df_LOR_2021.csv'), index=False)