In [1]:
import re

import pandas as pd

In [2]:
AGS_RE = re.compile('^(\d{3,5}).*', re.M)
data = []
PATH = 'raw/Pflegestatistik_2013_Rheinland-Pfalz.xls'

In [3]:
def get_values(df, mapping, prefix='personal', indicator='key', skip_on=None):
    ags = None
    cache = set()
    for i, row in df.iterrows():
        ind = str(row[indicator])
        if not pd.isnull(ind) and AGS_RE.match(ind) is not None:
            ags = AGS_RE.match(ind).group(1).strip()
            if len(ags) < 5 and not ags.startswith('07'):
                # special case Landau where it's missing
                ags = '07%s' % ags
        if ags is not None and (not ags.startswith('07') or len(ags) < 5):
            ags = None
        if ags is None:
            continue
        if pd.isnull(row['key']):
            continue
        if skip_on is not None and row['key'] in skip_on:
            ags = None
            continue
        for k, v in mapping.items():
            if k in row['key']:
                if isinstance(row['value'], str):
                    val = row['value'].replace('-', '0').strip()
                    if val == '.':
                        val = None
                    else:
                        val = float(val)
                else:
                    val = row['value']
                cache_key = '%s-%s' % (ags, v)
                if cache_key not in cache:
                    yield ags, prefix, v, val
                    cache.add(cache_key)

In [4]:
df = pd.read_excel(PATH, sheetname='tab_s22_2013_RP', skiprows=13)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

bed_mapping = {
  '1-Bett-Zimmern': '1-bed',
  '2-Bett-Zimmern': '2-bed',
  '3-Bett-Zimmern': '3-bed',
  '4 und mehr-Bett-Zimmern': '4-bed'
}

data.extend(list(get_values(df, bed_mapping, prefix='beds')))
df.head()

Unnamed: 0,key,value,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,Anzahl ...,,,,,,,,,,
1,07 Rheinland-Pfalz,,,,,,,,,,
2,Verfügbare Plätze insgesamt ...,43275.0,,,,,,,,,
3,vollstationäre Pflege,,,,,,,,,,
4,zusammen ....................,41103.0,,,,,,,,,


In [5]:
df = pd.read_excel(PATH, sheetname='tab_s32_2013_RP', skiprows=10)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

personal_mapping = {
    'Vollzeitbeschäftigt': 'fully_employed',
#     'Teilzeitbeschäftigt': 'part-time',
    '   - über 50 % ': 'part-time-1',
    '      beschäftigt ': 'part-time-2',
    '   - geringfügig beschäftigt ': 'part-time-3',
    'Pflege und Betreuung': 'nursing_and_care',
    'soziale Betreuung': 'social_care',
    'zusätzliche Betreuung (§ 87b SGB XI)': 'other_care',
    'Personal insgesamt': 'total_personnel',
    'Auszubildende': 'trainee',
    'staatlich anerkannte': 'state_certified',
    'Gesundheits- und Krankenpfleger': 'nurse',
    'Gesundheits- und Kinderkrankenpfleger': 'nurse_children',
}

l = list(get_values(df, personal_mapping, prefix='personal'))
l
data.extend(l)
df.head()

Unnamed: 0,key,Unnamed: 1,value,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,07 Rheinland-Pfalz,,,,,,,
1,Personal insgesamt ..............................,,31509.0,,,,,
2,Beschäftigungsverhältnis ...,,,,,,,
3,Vollzeitbeschäftigt ...........................,,9215.0,,,,,
4,Teilzeitbeschäftigt ...,,,,,,,


In [6]:
df = pd.read_excel(PATH, sheetname='tab_s42_2013_RP', skiprows=11)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

recipient_mapping = {
    'Pflegestufe I ': 'recipients_nursing_class_1',
    'Pflegestufe II ': 'recipients_nursing_class_2',
    'Pflegestufe III ': 'recipients_nursing_class_3',
    'Pflegestufe zugeordnet': 'recipients_nursing_class_unknown',
}


data.extend(list(get_values(df, recipient_mapping,  prefix='recipients')))
df.head()

Unnamed: 0,key,value,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,07 Rheinland-Pfalz,35923.0,34089.0,,,,,
1,Pflegestufe I,14596.0,13719.0,,,,,
2,Pflegestufe II,14372.0,13609.0,,,,,
3,Pflegestufe III,5725.0,5531.0,,,,,
4,Bisher noch keiner,,,,,,,


In [7]:
df = pd.read_excel(PATH, sheetname='tab_s61_2013_RP', skiprows=14)

df = df.rename(columns={
    'Unnamed: 0': 'key',
    'Insgesamt                               ': 'value'
    
})

cost_mapping = {
    'Pflegeklasse 1': 'costs_nursing_class_1',
    'Pflegeklasse 2': 'costs_nursing_class_2',
    'Pflegeklasse 3': 'costs_nursing_class_3',
    'Verpflegung': 'food',
}


data.extend(list(get_values(df, cost_mapping, prefix='costs', skip_on=['Vergütung für Kurzzeitpflege  '])))
df.head()

Unnamed: 0,key,value,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,07 Rheinland-Pfalz,,,,,,,,,,
1,Vergütung für vollstationäre,,,,,,,,,,
2,Dauerpflege,,,,,,,,,,
3,Pflegesatz,,,,,,,,,,
4,Pflegeklasse 1 .............,46.28,,,,,,,,,


In [8]:
df = pd.DataFrame(data)
df = df.rename(columns={
    0: 'ags',
    1: 'type',
    2: 'key',
    3: 'value'
})
df['state'] = 'Rheinland-Pfalz'

part_time = df.groupby('ags').apply(lambda x: x[x['key'].str.contains('part-time')]['value'].sum()).reset_index()
part_time['state'] = 'Rheinland-Pfalz'
part_time['type'] = 'personal'
part_time['key'] = 'part-time'
part_time = part_time.rename(columns={0: 'value'})
df = df[~df['key'].str.contains('part-time')]
df = pd.concat([df, part_time])

df.to_csv('csvs/rheinland-pfalz.csv', index=False)
df.head()

Unnamed: 0,ags,key,state,type,value
0,7111,1-bed,Rheinland-Pfalz,beds,1136.0
1,7111,2-bed,Rheinland-Pfalz,beds,464.0
2,7111,3-bed,Rheinland-Pfalz,beds,0.0
3,7111,4-bed,Rheinland-Pfalz,beds,0.0
4,7131,1-bed,Rheinland-Pfalz,beds,822.0


In [9]:
df[(df['type'] == 'personal') & (df['ags'] == '07111')]

Unnamed: 0,ags,key,state,type,value
144,7111,total_personnel,Rheinland-Pfalz,personal,1305.0
145,7111,fully_employed,Rheinland-Pfalz,personal,412.0
149,7111,trainee,Rheinland-Pfalz,personal,124.0
150,7111,nursing_and_care,Rheinland-Pfalz,personal,785.0
151,7111,social_care,Rheinland-Pfalz,personal,65.0
152,7111,other_care,Rheinland-Pfalz,personal,53.0
153,7111,state_certified,Rheinland-Pfalz,personal,261.0
154,7111,nurse,Rheinland-Pfalz,personal,97.0
155,7111,nurse_children,Rheinland-Pfalz,personal,3.0
0,7111,part-time,Rheinland-Pfalz,personal,744.0
