# Scrape accessibility and shelter type

In [1]:
import requests
from bs4 import BeautifulSoup
import ast
import json
import pandas as pd
import numpy as np

In [2]:
r = requests.get('https://www.frauenhaus-suche.de/')
soup = BeautifulSoup(r.content, 'lxml')
script = soup.find('div', class_='tx-ks-zif-frauenhaus').find_all('script')[1].string.replace('\n','').replace('\t','').replace(';','').split('addressesJson =  ')[1]
d = ast.literal_eval(script)['features']

In [3]:
data = {}

In [4]:
for entry in d:

    uid = entry['properties']['uid']
    title = entry['properties']['title']
    einrichtungsart = entry['properties']['einrichtungsart']
    data[uid] = {}
    data[uid]['einrichtungsart'] =  einrichtungsart
    data[uid]['title'] = title

In [5]:
data['2321']

{'einrichtungsart': '101', 'title': 'AWO-Frauenhaus-Bottrop'}

In [6]:
items = soup.find_all('div', class_='searchResultItem')

In [7]:
for item in items:
    uid = item['id'].split('_')[1]
    images = item.find('div', class_='features').find_all('img')
    attributes = {img['src'].split("/")[-1][0:-4]: True for img in images}
    try:
        data[uid].update(attributes)
    except Exception:
        print(uid)

2499
2064
2467


In [8]:
df = pd.DataFrame.from_dict(data, orient='index').replace(np.nan, False)
df['shelter_id'] = df.index
df = df.reset_index(drop=True)

In [9]:
d = {
    'ZIF_Rollstuhl':'Gehbehinderung',
    'ZIF_Ohr':'Hörbehinderung/Taubheit',
    'ZIF_Auge_02':'Sehbehinderung/Blindheit',
    'ZIF_Sucht':'Suchtmittelabhängigkeit'
}

In [10]:
keep = ['shelter_id','title','einrichtungsart']
keep.extend([key for key in d.keys()])
df = df[keep] # keep relevant columns

In [11]:
df = df.rename(columns=d)

In [12]:
df.einrichtungsart = df.einrichtungsart.map({
    '101': 'Frauenhaus',
    '102': 'Schutzwohnung'
})

In [13]:
df.to_csv("./data/helpers/shelters_metadata.csv", index=False)