In [1]:
import pandas as pd
import numpy as np
from pyproj import Proj
import requests

In [2]:
df = pd.read_csv("../data/raw/Unfallorte2018_LinRef.txt", sep=";")

In [3]:
#Berlin eg ULAND 11. We keep only Berlin. 
dfb = df[df["ULAND"].eq(11)]

In [4]:
#We keep only the accidents with at least one bicycle. 
dfb = dfb[dfb["IstRad"].eq(1)]

In [5]:
#dfb contains all accicents with bycicles in Berlin
dfb.shape

(5192, 22)

In [6]:
dfb.head(1)

Unnamed: 0,OBJECTID,ULAND,UREGBEZ,UKREIS,UGEMEINDE,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,...,ULICHTVERH,IstRad,IstPKW,IstFuss,IstKrad,IstGkfz,IstSonstige,STRZUSTAND,LINREFX,LINREFY
112746,112747,11,0,3,3,2018,1,15,4,3,...,1,1,0,1,0,0,0,1,798261384899999950000,5835047259899999900000


In [7]:
#replace comma with dot and create new columns
dfb['UTMx'] = dfb['LINREFX'].str.replace(',','.')
dfb['UTMy'] = dfb['LINREFY'].str.replace(',','.')

In [8]:
#and drop the old
dfb = dfb.drop(columns={"LINREFX","LINREFY"})

In [9]:
dfb.head(1)

Unnamed: 0,OBJECTID,ULAND,UREGBEZ,UKREIS,UGEMEINDE,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,...,ULICHTVERH,IstRad,IstPKW,IstFuss,IstKrad,IstGkfz,IstSonstige,STRZUSTAND,UTMx,UTMy
112746,112747,11,0,3,3,2018,1,15,4,3,...,1,1,0,1,0,0,0,1,798261.3848999998,5835047.2599


In [10]:
#transform UTM coordinates into latitude and longitude 
myProj = Proj(proj="utm", zone=32, ellps="WGS84", south=False)
dfb['lon'], dfb['lat'] = myProj(dfb['UTMx'].values, dfb['UTMy'].values, inverse=True)

In [11]:
dfb.head(1)

Unnamed: 0,OBJECTID,ULAND,UREGBEZ,UKREIS,UGEMEINDE,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,...,IstPKW,IstFuss,IstKrad,IstGkfz,IstSonstige,STRZUSTAND,UTMx,UTMy,lon,lat
112746,112747,11,0,3,3,2018,1,15,4,3,...,0,1,0,0,0,1,798261.3848999998,5835047.2599,13.403228,52.583472


In [12]:
dfb.dtypes

OBJECTID         int64
ULAND            int64
UREGBEZ          int64
UKREIS           int64
UGEMEINDE        int64
UJAHR            int64
UMONAT           int64
USTUNDE          int64
UWOCHENTAG       int64
UKATEGORIE       int64
UART             int64
UTYP1            int64
ULICHTVERH       int64
IstRad           int64
IstPKW           int64
IstFuss          int64
IstKrad          int64
IstGkfz          int64
IstSonstige      int64
STRZUSTAND       int64
UTMx            object
UTMy            object
lon            float64
lat            float64
dtype: object

In [13]:
dfb = dfb.drop(columns={"UTMx","UTMy"})

In [14]:
dfb.head(1)

Unnamed: 0,OBJECTID,ULAND,UREGBEZ,UKREIS,UGEMEINDE,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,...,ULICHTVERH,IstRad,IstPKW,IstFuss,IstKrad,IstGkfz,IstSonstige,STRZUSTAND,lon,lat
112746,112747,11,0,3,3,2018,1,15,4,3,...,1,1,0,1,0,0,0,1,13.403228,52.583472


In [15]:
#now we can find the adreesses where the accidents took place
#we use the HERE API for that
apikey = "552jChM8TMOdQm7skDaTwtca_Rp_dneoEHhFCMuCUec"
base_url = ("https://reverse.geocoder.ls.hereapi.com/6.2/reversegeocode.json"
            + "?apiKey=552jChM8TMOdQm7skDaTwtca_Rp_dneoEHhFCMuCUec"
            + "&mode=retrieveAddresses")
coordinates = f"&prox={52.486900},{13.357186}"

In [16]:
requests.get(base_url + coordinates).json()

{'Response': {'MetaInfo': {'Timestamp': '2020-04-13T13:56:14.328+0000',
   'NextPageInformation': '2'},
  'View': [{'_type': 'SearchResultsViewType',
    'ViewId': 0,
    'Result': [{'Relevance': 1.0,
      'Distance': 6.4,
      'MatchLevel': 'street',
      'MatchQuality': {'Country': 1.0,
       'State': 1.0,
       'County': 1.0,
       'City': 1.0,
       'District': 1.0,
       'Street': [1.0],
       'PostalCode': 1.0},
      'Location': {'LocationId': 'NT_3bofxkhVVhy-.-eVkcfOAB_l_64987760_R',
       'LocationType': 'point',
       'DisplayPosition': {'Latitude': 52.48685, 'Longitude': 13.35714},
       'MapView': {'TopLeft': {'Latitude': 52.48685, 'Longitude': 13.35692},
        'BottomRight': {'Latitude': 52.48659, 'Longitude': 13.35714}},
       'Address': {'Label': 'Hauptstraße, 10827 Berlin, Deutschland',
        'Country': 'DEU',
        'State': 'Berlin',
        'County': 'Berlin',
        'City': 'Berlin',
        'District': 'Schöneberg',
        'Street': 'Hauptstraße

In [17]:
address = requests.get(base_url + coordinates).json()

In [18]:
type(address)

dict

In [19]:
def get_adress_from_hereapi(lon,lat):
    base = ("https://reverse.geocoder.ls.hereapi.com/6.2/reversegeocode.json"
            + "?apiKey=552jChM8TMOdQm7skDaTwtca_Rp_dneoEHhFCMuCUec"
            + "&mode=retrieveAddresses")
    requestjson = requests.get(base_url+f"&prox={lon},{lat}").json()
    address = requestjson["Response"]["View"][0]["Result"][0]["Location"]["Address"]
        
    return_dict = {'City': "",
                    'District': "",
                    'Street': "",
                    'HouseNumber' : "",
                    'PostalCode': ""}
    
    for categ in return_dict:
        try:
            return_dict[categ] = address[categ]
        except:
            pass
    
    
    return return_dict

In [20]:
dfb = dfb.copy().reset_index(drop=True)

In [21]:
dfb["address"] = dfb[['lat', 'lon']].apply(lambda x: get_adress_from_hereapi(x[0], x[1]), axis = 1)

In [22]:
dfb["address"]

0       {'City': 'Berlin', 'District': 'Niederschönhau...
1       {'City': 'Berlin', 'District': 'Prenzlauer Ber...
2       {'City': 'Berlin', 'District': 'Friedrichshain...
3       {'City': 'Berlin', 'District': 'Mitte', 'Stree...
4       {'City': 'Berlin', 'District': 'Niederschönewe...
                              ...                        
5187    {'City': 'Berlin', 'District': 'Neukölln', 'St...
5188    {'City': 'Berlin', 'District': 'Friedrichsfeld...
5189    {'City': 'Berlin', 'District': 'Charlottenburg...
5190    {'City': 'Berlin', 'District': 'Weißensee', 'S...
5191    {'City': 'Berlin', 'District': 'Fennpfuhl', 'S...
Name: address, Length: 5192, dtype: object

In [23]:
pd.DataFrame(list(dfb["address"].values))

Unnamed: 0,City,District,Street,HouseNumber,PostalCode
0,Berlin,Niederschönhausen,Dietzgenstraße,36A,13156
1,Berlin,Prenzlauer Berg,Greifswalder Straße,45,10405
2,Berlin,Friedrichshain,Frankfurter Allee,87,10247
3,Berlin,Mitte,Leipziger Straße,,10117
4,Berlin,Niederschöneweide,Schnellerstraße,,12439
...,...,...,...,...,...
5187,Berlin,Neukölln,,,12053
5188,Berlin,Friedrichsfelde,Rummelsburger Straße,,10315
5189,Berlin,Charlottenburg,,,10587
5190,Berlin,Weißensee,Roelckestraße,107,13088


In [24]:
dfbb = pd.concat([dfb,pd.DataFrame(list(dfb["address"].values))],axis=1)

In [25]:
dfbb.to_csv("../data/output/bike_after_step_01.csv")