In [1]:
import re

import pandas as pd

In [2]:
AGS_RE = re.compile('^(\d{5}).*', re.M)
data = []
PATH = 'raw/Pflegestatistik_2013_Sachsen-Anhalt.xls'

In [3]:
def get_values(df, mapping, prefix='personal', indicator='key', skip_on=None):
    ags = None
    for i, row in df.iterrows():
        ind = str(row[indicator])
        if not pd.isnull(ind) and AGS_RE.match(ind) is not None:
            ags = ind.replace(' ', '')
        if ags is None:
            continue
        if pd.isnull(row['key']):
            continue
        if skip_on is not None and row['key'] in skip_on:
            ags = None
            continue
        for k, v in mapping.items():
            if k in row['key']:
                if isinstance(row['value'], str):
                    val = row['value'].replace('-', '0').strip()
                    if val == '.':
                        val = None
                    else:
                        val = float(val)
                else:
                    val = row['value']
                yield ags, prefix, v, val


In [4]:
df = pd.read_excel(PATH, sheetname='Vergütung', skiprows=14)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

cost_mapping = {
    'Pflegeklasse 1': 'costs_nursing_class_1',
    'Pflegeklasse 2': 'costs_nursing_class_2',
    'Pflegeklasse 3': 'costs_nursing_class_3',
    'Verpflegung': 'food',
}


data.extend(list(get_values(df, cost_mapping, prefix='costs', skip_on=['Vergütung für Kurzzeitpflege  '])))
df.head()

Unnamed: 0,key,value,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,Vergütung für vollstationäre,,,,,,
1,Dauerpflege,,,,,,
2,Pflegesatz,,,,,,
3,Pflegeklasse 1,38.97,,,,,
4,Pflegeklasse 2,51.24,,,,,


In [5]:
df = pd.read_excel(PATH, sheetname='verfügbare Plätze Betten', skiprows=13)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

bed_mapping = {
  '1-Bett-Zimmern': '1-bed',
  '2-Bett-Zimmern': '2-bed',
  '3-Bett-Zimmern': '3-bed',
  '4 und mehr-Bett-Zimmern': '4-bed'
}

data.extend(list(get_values(df, bed_mapping, prefix='beds')))
df.head()

Unnamed: 0,key,value
0,Anzahl ...,
1,Verfügbare Plätze insgesamt,31120.0
2,,
3,vollstationäre Pflege,
4,zusammen,29935.0


In [6]:
df = pd.read_excel(PATH, sheetname='Personal', skiprows=10)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt': 'value'
    
})

personal_mapping = {
    'Vollzeitbeschäftigt': 'fully_employed',
    'Teilzeitbeschäftigt': 'part-time',
    'Pflege und Betreuung': 'nursing_and_care',
    'soziale Betreuung': 'social_care',
    'zusätzliche Betreuung (§ 87b SGB XI)': 'other_care',
    'Personal insgesamt': 'total_personnel'
}


data.extend(list(get_values(df, personal_mapping, prefix='personal')))
df.head()

Unnamed: 0,key,value
0,Personal insgesamt,20071.0
1,,
2,darunter,
3,"Auszubildende/-r, (Um-)Schüler/-in",1079.0
4,,


In [7]:
df = pd.read_excel(PATH, sheetname='Pflegebedürftige', skiprows=10)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Unnamed: 2': 'value'
    
})

recipient_mapping = {
    'Pflegestufe I ': 'recipients_nursing_class_1',
    'Pflegestufe II ': 'recipients_nursing_class_2',
    'Pflegestufe III ': 'recipients_nursing_class_3',
    'Pflegestufe zugeordnet': 'recipients_nursing_class_unknown',
}


data.extend(list(get_values(df, recipient_mapping,  prefix='recipients')))
df.head()

Unnamed: 0,key,Insgesamt,value
0,Insgesamt,29790.0,28283.0
1,Pflegestufe I,10308.0,9532.0
2,Pflegestufe II,13590.0,13012.0
3,Pflegestufe III,5540.0,5432.0
4,Bisher noch keiner,,


In [8]:
df = pd.DataFrame(data)
df = df.rename(columns={
    0: 'ags',
    1: 'type',
    2: 'key',
    3: 'value'
})
df['state'] = 'Sachsen-Anhalt'
df.to_csv('sachsen-anhalt.csv', index=False)
df.head()

Unnamed: 0,ags,type,key,value,state
0,15001,costs,costs_nursing_class_1,40.63,Sachsen-Anhalt
1,15001,costs,costs_nursing_class_2,52.39,Sachsen-Anhalt
2,15001,costs,costs_nursing_class_3,63.32,Sachsen-Anhalt
3,15001,costs,food,16.25,Sachsen-Anhalt
4,15002,costs,costs_nursing_class_1,41.05,Sachsen-Anhalt
