In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.interpolate import splev, splrep
%matplotlib inline
age_cols = ['0-21',
            '22-30',
            '31-64',
            '65-74',
            '75-84',
            '85-94',
            '95-120']
compiled_nursing_home_wi_data = '../raw_data/nursing_home_compendium.csv'
df = pd.read_csv(compiled_nursing_home_wi_data, index_col=0)
pop_df = pd.read_csv('../raw_data/demography_by_age.csv')
ages = range(22, 110)
seasons = range(2011, 2015)
interpolated_nursing_home_dist = pd.DataFrame(0,
                                              index=seasons,
                                              columns=list(range(0, 22)) + list(ages))
for season in seasons:
    cov_22_30 = [df.loc[season, '22-30']/9] * 9
    cov_31_64 = [df.loc[season, '31-64']/34]* 34
    cov_65_74 = [df.loc[season, '65-74']/10] * 10
    cov_75_84 = [df.loc[season, '75-84']/10]* 10
    cov_85_94 = [df.loc[season, '85-94']/10]* 10
    cov_95_109 =[df.loc[season, '95-120']/15]* 15
    covs = cov_22_30 + cov_31_64 +  cov_65_74 + cov_75_84 + cov_85_94 + cov_95_109

    spl = splrep(ages, np.sqrt(covs), t=[51, 70, 80, 90, 100])
    covs = splev(ages, spl)**2
    covs = covs / sum(covs)

    for age, cov in zip(ages, covs):
        num_in_nursing_homes = round(cov * 538 * df.loc[season, 'perc_beds_occupied']/100)
        total_pop = pop_df[(pop_df.Age == age) & (pop_df.Year == season)]
        if len(total_pop.MESA_pop) > 0:
            total_pop = int(total_pop.MESA_pop)
            frac_in_nursing_homes = num_in_nursing_homes / total_pop
            if frac_in_nursing_homes > 1:
                frac_in_nursing_homes = 1
        else:
            frac_in_nursing_homes = np.nan
        interpolated_nursing_home_dist.loc[season, age] = frac_in_nursing_homes



all_seasons = list(range(2005, 2019)) + [2009.5]
index = pd.MultiIndex.from_product([range(0, 101), all_seasons], names=['Age', 'Season'])
final_df = pd.DataFrame(index=index, columns=['Nursing_home_fraction'])
mean_nursing_home = interpolated_nursing_home_dist.mean()

for age in range(0, 101):
    for year in range(2011, 2014):
        final_df.loc[(age, year), 'Nursing_home_fraction'] = 1 - interpolated_nursing_home_dist.loc[year, age]

for age in range(0, 101):
    for year in set(all_seasons) - set(range(2011, 2014)):
        final_df.loc[(age, year), 'Nursing_home_fraction'] = 1 - mean_nursing_home[age]
        
final_df.to_csv('../data/nursing_home_flat.csv')