In [1]:
import pandas as pd
from pathlib import Path
import glob
import numpy as np
from datetime import date
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
today = date.today()
now = datetime.now()

In [26]:
## risk-adjusted QM score can be thought of as 
# an estimate of what the nursing facility's QM rate would be if the facility had residents with average risk

## expected QM: resident-level expected score for a QM 
# an estimate of the risk that a resident will trigger the QM

qm_df = pd.read_csv('NH_QualityMsr_Claims_2020.csv', sep=',', encoding='cp1252')
qm_df = qm_df.rename(columns=str.lower)
qm_df = qm_df.rename(columns={'federal provider number':'provnum',
                     'measure description': 'measure',
                     'resident type': 'resident_type',
                     'observed score': 'score',
                     'adjusted score': 'adj_score',
                     'expected score': 'exp_score'})
qm_df[qm_df['provider name'] == 'PINES POST ACUTE AND MEMORY CARE']

Unnamed: 0,provnum,provider name,provider address,provider city,provider state,provider zip code,measure code,measure,resident_type,adj_score,score,exp_score,footnote for score,used in quality measure five star rating,measure period,location,processing date
54308,525497,PINES POST ACUTE AND MEMORY CARE,1625 E MAIN ST,CLINTONVILLE,WI,54929,521,Percentage of short-stay residents who were re...,Short Stay,,,,9.0,Y,20200101-20201231,"1625 E MAIN ST, CLINTONVILLE, WI, 54929",2021-08-01
54309,525497,PINES POST ACUTE AND MEMORY CARE,1625 E MAIN ST,CLINTONVILLE,WI,54929,522,Percentage of short-stay residents who had an ...,Short Stay,,,,9.0,Y,20200101-20201231,"1625 E MAIN ST, CLINTONVILLE, WI, 54929",2021-08-01
54310,525497,PINES POST ACUTE AND MEMORY CARE,1625 E MAIN ST,CLINTONVILLE,WI,54929,551,Number of hospitalizations per 1000 long-stay ...,Long Stay,,,,9.0,Y,20200101-20201231,"1625 E MAIN ST, CLINTONVILLE, WI, 54929",2021-08-01
54311,525497,PINES POST ACUTE AND MEMORY CARE,1625 E MAIN ST,CLINTONVILLE,WI,54929,552,Number of outpatient emergency department visi...,Long Stay,,,,9.0,Y,20200101-20201231,"1625 E MAIN ST, CLINTONVILLE, WI, 54929",2021-08-01


In [7]:
occ_df = pd.read_csv('all/occupancy_by_month_2021_08_30--15-08-44.csv')
occ_df_2020 = occ_df[occ_df['year']==2020]
occ_df_2020.head()

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,provnum,year,month,provname,city,state,county_name,county_fips,cy_qtr,workdate,...,hrs_medaide_ctr,day,day_name,is_weekend,bedcert,region,market,cluster,occupancy,ensign
9,15009,2020,4,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,Franklin,59.0,2020Q2,2020-04-01,...,0.0,1,Wednesday,False,57,,,,0.852047,False
10,15009,2020,5,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,Franklin,59.0,2020Q2,2020-05-01,...,0.0,1,Friday,False,57,,,,0.724392,False
11,15009,2020,6,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,Franklin,59.0,2020Q2,2020-06-01,...,0.0,1,Monday,False,57,,,,0.788889,False
12,15009,2020,7,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,Franklin,59.0,2020Q3,2020-07-01,...,0.0,1,Wednesday,False,57,,,,0.834748,False
13,15009,2020,8,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,Franklin,59.0,2020Q3,2020-08-01,...,0.0,1,Saturday,True,57,,,,0.943407,False


In [19]:
occ_feature_cols = ['provnum', 'provname', 'state', 'county_name', 'city', 'year', 'month', 'occupancy',
                    'bedcert', 'market', 'ensign']
qm_feature_cols = ['provnum', 'measure','resident_type', 'score', 'adj_score', 'exp_score' ]

occ_df_2020 = occ_df_2020[occ_feature_cols]
qm_df = qm_df[qm_feature_cols]

In [28]:
qm_df['measure'].unique()

array(['Percentage of short-stay residents who were rehospitalized after a nursing home admission',
       'Percentage of short-stay residents who had an outpatient emergency department visit',
       'Number of hospitalizations per 1000 long-stay resident days',
       'Number of outpatient emergency department visits per 1000 long-stay resident days'],
      dtype=object)

In [21]:
merge_df = pd.merge(
                    occ_df_2020,
                    qm_df,
                    how = 'outer',
                    left_on = 'provnum',
                    right_on = 'provnum',
                    validate = 'many_to_many')

In [24]:
merge_df['state'].unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN',
       'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', nan], dtype=object)

In [23]:
merge_df.to_csv('all/merge_qm_occupancy_2020.csv', index=False)

In [27]:
occ_df_2020[occ_df_2020['provname'] == 'PINES POST ACUTE AND MEMORY CARE']

Unnamed: 0,provnum,provname,state,county_name,city,year,month,occupancy,bedcert,market,ensign
325697,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,4,0.881333,50,Wisconsin Market,True
325698,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,5,0.853548,50,Wisconsin Market,True
325699,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,6,0.906667,50,Wisconsin Market,True
325700,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,7,0.970323,50,Wisconsin Market,True
325701,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,8,0.959355,50,Wisconsin Market,True
325702,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,9,0.902667,50,Wisconsin Market,True
325703,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,10,0.941935,50,Wisconsin Market,True
325704,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,11,0.865333,50,Wisconsin Market,True
325705,525497,PINES POST ACUTE AND MEMORY CARE,WI,Waupaca,CLINTONVILLE,2020,12,0.86,50,Wisconsin Market,True
