In [1]:
import pandas as pd
import numpy as np

In [2]:
def read_nursinghomes(filename):
    df = pd.read_csv(filename, parse_dates=['start_date', 'start_date_contract', 'letzte Aktualisierung durch Pflegeeinrichtung'], encoding='utf-8')
    df['ags'] = df['ags'].apply(lambda x: None if pd.isnull(x) else str(int(x)).zfill(5))
    df['PLZ'] = df['PLZ'].apply(lambda x: None if pd.isnull(x) else str(int(x)).zfill(5))
    return df

In [3]:
df = read_nursinghomes('data/nursinghomes.csv')
df.head()

Unnamed: 0,Ausstattung/Angebote: Bibliothek,Ausstattung/Angebote: Eigene Möbel,Ausstattung/Angebote: Fahrdienst,Ausstattung/Angebote: Fernseher,Ausstattung/Angebote: Garten,Ausstattung/Angebote: Hallenbad,Ausstattung/Angebote: Haustiere,Ausstattung/Angebote: Internet,Ausstattung/Angebote: Kegelbahn,Ausstattung/Angebote: Probewohnen,...,slug,nachtpflege,kurzzeitpflege,tagespflege,vollstationaer,red_flag_food,red_flag_decubitus,red_flag_medicine,red_flag_incontinence,red_flag_pain
0,0,0,0,0,0,0,0,0,0,0,...,01067-altenpflegeheim-st-michael,False,False,False,True,True,True,True,False,True
1,1,1,1,0,1,0,1,0,0,0,...,01067-pflegeheim-an-der-yenidze,False,False,False,True,False,False,True,False,False
2,1,1,0,1,1,0,0,1,0,1,...,01067-pflegewohnzentrum-haus-friedrichstadt,False,False,False,True,False,False,False,False,False
3,0,0,0,0,0,0,0,0,0,0,...,01067-seniorenzentrum-kathrin-lingk,False,False,False,True,True,True,False,True,False
4,1,1,0,1,0,0,1,1,0,0,...,01067-wohnen-pflege-fur-senioren-im-haus-am-sc...,False,False,False,True,False,True,True,True,True


In [4]:
writer = pd.ExcelWriter('landkreise.xlsx', engine='xlsxwriter')
writer_bund = pd.ExcelWriter('bundeslaender.xlsx', engine='xlsxwriter')

## Red Flags

"Red flags" mean the nursinghome had at least one discrepancy in one of the questions for a category in the transparency report. E.g. if 1 of 9 people was not properly handled when having decubitus (one of the questions 1, 2, 5, 6), it's a red flag for the decubitus category.

The categories and questions are defined below.

In [5]:
RED_FLAGS = {
    'red_flag_decubitus': [1, 2, 5, 6],
    'red_flag_food': [7, 8, 9, 11],
    'red_flag_pain': [13, 14, 15],
    'red_flag_incontinence': [16, 17],
    'red_flag_medicine': [23, 24, 25, 26]
}
def percent(group):
    return group.sum() / len(group) * 100

def get_county_flag_report(df, flag, grouper='landkreis'):
    df_r = df.groupby(grouper)[flag].agg([len, sum, percent]).sort_values('percent', ascending=False)
    df_r['rank'] = df_r['percent'].rank(method='min')
    return df_r


In [6]:
RED_FLAG_LABELS = {
    'red_flag_decubitus': 'Dekubitus',
    'red_flag_food': 'Nahrungsversorgung',
    'red_flag_pain': 'Schmerzen',
    'red_flag_incontinence': 'Inkontinenz',
    'red_flag_medicine': 'Medizinische Versorgung'
}

def generate_for_flag(df, key, label, grouper='landkreis'):
    df_r = get_county_flag_report(df, key, grouper=grouper)
    df_r = df_r.rename(columns={
            'len': 'Anzahl Pflegeheime',
            'sum': 'Anzahl Pflegeheime mit %s Red Flag' % label,
            'percent': 'Prozent',
            'rank': 'Rang',
        })
    return df_r

for key, label in RED_FLAG_LABELS.items():
    df_r = generate_for_flag(df, key, label)
    df_r.to_excel(writer, sheet_name=label)

    
for key, label in RED_FLAG_LABELS.items():
    df_r = generate_for_flag(df, key, label, grouper='bundesland')
    df_r.to_excel(writer_bund, sheet_name=label)



red_flags = list(RED_FLAGS.keys())
red_flags_total = lambda x: pd.Series({'len': len(x),
                                     'sum': x[red_flags].any(1).sum(),
                                     'percent': x[red_flags].any(1).sum() / len(x) * 100
                                     })
def generate_total_flag(df, grouper='landkreis'):
    df_r = df.groupby(grouper).apply(red_flags_total).sort_values(by='percent', ascending=False)
    df_r['rank'] = df_r['percent'].rank(method='min')
    df_r = df_r.rename(columns={
            'len': 'Anzahl Pflegeheime',
            'sum': 'Anzahl Pflegeheime mit mind. einer Red Flag',
            'percent': 'Prozent',
            'rank': 'Rang',
        })
    return df_r

df_r = generate_total_flag(df)
df_r.to_excel(writer, sheet_name='Red Flags Zusammen')

df_r = generate_total_flag(df, grouper='bundesland')
df_r.to_excel(writer_bund, sheet_name='Red Flags Zusammen')

Generate the mean for the county and state level of the following:

- care grade (medizinische Teil der Pflegenote)
- Price for fully stationary, care level 1 - 3 (Preis für Pflegestufe 1, 2 und 3 vollstationär)
- Age of the nursing home
- Size of the nursing home

In [7]:
VALUES = {
    'grade_care': 'Note Pflege+med. Versorgung',
    'Vollstationär Allgemein Pflegestufe 1 Gesamtpreis': 'Vollstat 1 Gesamtpreis',
    'Vollstationär Allgemein Pflegestufe 2 Gesamtpreis': 'Vollstat 2 Gesamtpreis',
    'Vollstationär Allgemein Pflegestufe 3 Gesamtpreis': 'Vollstat 3 Gesamtpreis',
    'days_in_operation': ('Alter in Jahren', lambda x: x / 365),
    'belegt_vollstationär': 'Belegte Betten vollstationär',
}

def get_value_df(df, key, label, func=None, grouper='landkreis'):
    df_r = df.groupby(grouper)[key].mean().sort_values().to_frame()
    if func is not None:
        df_r[key] = func(df_r[key])
    df_r['rank'] = df_r[key].rank(method='min')
    df_r = df_r.rename(columns={
            key: 'Durchschnittliche %s' % label,
            'rank': 'Rang',
        })
    return df_r

for w, grouper in zip([writer, writer_bund], ['landkreis', 'bundesland']):
    for k, v in VALUES.items():
        f = None
        if isinstance(v, tuple):
            v, f = v
        df_r = get_value_df(df, k, v, func=f, grouper=grouper)
        df_r.to_excel(w, sheet_name=v)


Generate absolute and percentage numbers for the provider type (public, private, caritable, unknown).

In [8]:
def generate_traeger(df, grouper='landkreis'):
    traeger_art = list(df['traeger_art'].value_counts().index)
    df_r = df.groupby(grouper)['traeger_art'].value_counts().unstack().fillna(0)
    df_abs = df_r.T

    df_abs['Type'] = 'Absolute'
    df_abs = df_abs.set_index('Type', append=True)

    df_percent = df_r.div(df_r.sum(1), axis=0) * 100
    df_percent = df_percent.T
    df_percent['Type'] = 'Percentage'
    df_percent = df_percent.set_index('Type', append=True)

    df_r = pd.concat([df_abs, df_percent])
    df_r = df_r.sort_index().T
    return df_r

df_r = generate_traeger(df)
df_r.to_excel(writer, sheet_name='Trägerart')

df_r = generate_traeger(df, grouper='bundesland')
df_r.to_excel(writer_bund, sheet_name='Trägerart')


In [9]:
writer.save()
writer_bund.save()