In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd



In [2]:
de = gpd.read_file("/Users/maxdonheiser/Documents/DE_Geodaten/kreisgrenzen.geojson")

In [3]:
fpath = "data/scraped/230103_datenstand.json"

# load data
raw = pd.read_json(fpath, orient="index")
raw["shelter_id"] = raw.index
raw = raw.reset_index(drop=True)

# clean up geometry columns
raw.loc[raw.geography.notnull(), "geometry"] = raw.loc[raw.geography.notnull(), "geography"]

# get all timeseries data
df = pd.DataFrame()

for i, row in raw.iterrows():
    
    temp = pd.DataFrame(row["data"])
    temp["shelter_id"] = row["shelter_id"]
    temp["shelter_name"] = row["title"]
    temp["latitude"] = row["geometry"]["coordinates"][1]
    temp["longitude"] = row["geometry"]["coordinates"][0]
    df = pd.concat([df, temp])
    
df = df[["shelter_name","shelter_id","latitude","longitude"]].drop_duplicates()

df = df.loc[df.shelter_name.notnull(),]

df["points"] = gpd.points_from_xy(df.longitude, df.latitude, crs="EPSG:4326")

df["gen"] = np.nan
df["bez"] = np.nan
df["nuts"] = np.nan

df = df.reset_index(drop=True)

In [4]:
# geocode kreis
for j, row in df.iterrows():
    
    point = row["points"]
    shelter = row["shelter_name"]
    
    for k, kreis in de.iterrows():
        
        geometry = kreis["geometry"]
        
        if geometry.contains(point):
            
            df.loc[j, "bez"] = kreis["BEZ"]
            df.loc[j, "gen"]  = kreis["GEN"]
            df.loc[j, "nuts"]  = kreis["NUTS"]
            print(f"Geocoded {shelter}")
            

Geocoded Frauen- und Kinderschutzhaus Aachen
Geocoded Frauenhaus Stormarn
Geocoded Frauenhaus Alsdorf
Geocoded Frauenschutzwohnung Altenburg
Geocoded Frauenhaus Ansbach
Geocoded Frauenschutz Weimarer Land
Geocoded Frauenhaus Arnsberg
Geocoded Frauen- und Kinderschutzhaus Bay. Untermain
Geocoded Frauen-und Kinderschutzhaus Aschersleben
Geocoded Frauen- und Kinderschutzwohnung Vogtlandkreis
Geocoded Frauenhaus Augsburg
Geocoded Frauenhaus Bad Hersfeld
Geocoded AWO Frauenhaus &quotLotte Lemke&quot
Geocoded Frauenhaus Bad Kreuznach
Geocoded Frauenschutzwohnung
Geocoded Haus für Frauen in Not
Geocoded Autonomes Frauenhaus
Geocoded Frauen- und Kinderschutzhaus Ammerland Wesermarsch
Geocoded Frauen- und Kinderschutzhaus Baden-Baden u. Landkreis Rastatt
Geocoded Frauen- und Kinderschutzhaus Ballenstedt
Geocoded Frauenhaus
Geocoded Frauenhaus im Landkreis Kassel
Geocoded Frauenschutzhaus Bautzen
Geocoded Frauenhaus
Geocoded Frauenhaus Bergstraße
Geocoded Beratungsstelle für Betroffene von häusl

In [5]:
# get bundesländer
states = gpd.read_file("/Users/maxdonheiser/Documents/DE_Geodaten/bundesländergrenzen.geojson")
states = states[["NUTS","GEN"]].rename(columns={"NUTS":"nuts2","GEN":"bundesland"})

In [6]:
# and merge
df["nuts2"] = df.nuts.str[0:3]
df = pd.merge(df, states, on="nuts2")

In [7]:
df.head()

Unnamed: 0,shelter_name,shelter_id,latitude,longitude,points,gen,bez,nuts,nuts2,bundesland
0,Frauen- und Kinderschutzhaus Aachen,2270,50.788047,6.097588,POINT (6.09759 50.78805),Städteregion Aachen,Kreis,DEA2D,DEA,Nordrhein-Westfalen
1,Frauenhaus Alsdorf,2244,50.871672,6.208659,POINT (6.20866 50.87167),Städteregion Aachen,Kreis,DEA2D,DEA,Nordrhein-Westfalen
2,Frauenhaus Arnsberg,2182,51.407733,8.051076,POINT (8.05108 51.40773),Hochsauerlandkreis,Kreis,DEA57,DEA,Nordrhein-Westfalen
3,Frauenhaus Bergisch Gladbach,2067,50.992224,7.15343,POINT (7.15343 50.99222),Rheinisch-Bergischer Kreis,Kreis,DEA2B,DEA,Nordrhein-Westfalen
4,AWO Frauenhaus Bielefeld,2118,52.035302,8.526658,POINT (8.52666 52.03530),Bielefeld,Kreisfreie Stadt,DEA41,DEA,Nordrhein-Westfalen


In [8]:
df = df.drop(["points"], axis=1)

In [9]:
df.to_csv("data/helpers/shelters_geocoded.csv", index=False)