In [None]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 500)

In [None]:
# CLIENT_LIST = ['avante', 'hsm', 'marquis', 'midwest', 'mmh','trio', 'uch', 'vintage']

CLIENT_LIST = ['avante', 'marquis','trio']
START_DATE = '2021-01-01'
END_DATE = '2021-08-31'
STEP = 1

percent_range = [i for i in range(STEP,101,STEP)]

In [None]:
ranked_d = pd.read_csv('s3://saiva-playground/gill/ranked_d.csv')
ranked_d['censusdate'] = pd.to_datetime(ranked_d['censusdate'])
ranked_d = ranked_d[(ranked_d['client'].isin(CLIENT_LIST))&(ranked_d['censusdate']>=START_DATE)&(ranked_d['censusdate']<=END_DATE)]
for pct in percent_range:
    ranked_d[f'rank_at_{pct}'] = round((ranked_d['ranked_d']*pct)/100.0)
    
ranked_d.head()

In [None]:
base_df = pd.read_csv('s3://saiva-playground/gill/RTH_data.csv')
base_df['dateoftransfer'] = pd.to_datetime(base_df['dateoftransfer'])
base_df['censusdate'] = pd.to_datetime(base_df['censusdate'])
base_df = base_df[
    (base_df['client'].isin(CLIENT_LIST)) &
    (base_df['dateoftransfer']>=START_DATE) &
    (base_df['dateoftransfer']<=END_DATE)
]
base_df.head()

In [None]:
unranked_base_df = base_df[base_df['rank_cutoff'].isna()]
base_df = base_df[~base_df['rank_cutoff'].isna()]
unranked_base_df = unranked_base_df.groupby(['client','facilityname','facilityid'])['resident_transferred'].count().reset_index()
unranked_base_df

## Calculating precision

###### <center>calculating precision numerator

In [None]:
precision_numerator = base_df.merge(ranked_d,how='left', on = ['censusdate', 'client', 'facilityid'])
precision_numerator = precision_numerator[~precision_numerator['ranked_d'].isna()]

def precision_numerator_lambda(row):
    for pct in percent_range:
        row[f'rank_at_{pct}'] = np.where(row[f'rank_at_{pct}']>=row['rank_cutoff'],1,0)
    return row
precision_numerator = precision_numerator.apply(precision_numerator_lambda,axis=1)

groupby_sum_cols = [col for col in ranked_d.columns if 'rank_at' in col]
grouped_precision_numerator = precision_numerator.groupby(['client','facilityid'])[groupby_sum_cols].sum().reset_index()
for col in grouped_precision_numerator.columns:
    if 'rank_at_' in col: 
        grouped_precision_numerator.rename(columns= {col:'num_'+col},inplace=True)
        
final_precision_numerator = pd.melt(grouped_precision_numerator, id_vars=['client', 'facilityid'], 
                                                 value_vars=grouped_precision_numerator.columns[2:].tolist(),
                                                var_name='rank', value_name='num_precision')

final_precision_numerator.sort_values(['client','facilityid'],inplace=True)
final_precision_numerator['rank'] = final_precision_numerator['rank'].str.replace('num_rank_at_','')
# final_precision_numerator['rank'] = final_precision_numerator['rank'].str.replace('%','')

final_precision_numerator.head()

##### <center>calculating clientwise precision numerator

In [None]:
clientwise_grouped_precision_numerator = precision_numerator.groupby(['client'])[groupby_sum_cols].sum().reset_index()
for col in clientwise_grouped_precision_numerator.columns:
    if 'rank_at_' in col: 
        clientwise_grouped_precision_numerator.rename(columns= {col:'num_'+col},inplace=True)

final_clientwise_precision_numerator = pd.melt(clientwise_grouped_precision_numerator, id_vars=['client'], 
                                                 value_vars=clientwise_grouped_precision_numerator.columns[1:].tolist(),
                                                var_name='rank', value_name='num_precision')

final_clientwise_precision_numerator.sort_values(['client'],inplace=True)
final_clientwise_precision_numerator['rank'] = final_clientwise_precision_numerator['rank'].str.replace('num_rank_at_','')
# final_clientwise_precision_numerator['rank'] = final_clientwise_precision_numerator['rank'].str.replace('%','')
final_clientwise_precision_numerator.head()

###### <center>calculating precision denominator

In [None]:
groupby_sum_cols = [col for col in ranked_d.columns if 'rank_at' in col]
precision_denominator = ranked_d.groupby(['client', 'facilityid'])[groupby_sum_cols].sum().reset_index()
for col in precision_denominator.columns:
    if 'rank_at_' in col: 
        precision_denominator.rename(columns= {col:'denom_'+col},inplace=True)
        
final_precision_denominator = pd.melt(precision_denominator, id_vars=['client', 'facilityid'], 
                                                 value_vars=precision_denominator.columns[2:].tolist(),
                                                var_name='rank', value_name='denom_precision')

final_precision_denominator.sort_values(['client','facilityid'],inplace=True)
final_precision_denominator['rank'] = final_precision_denominator['rank'].str.replace('denom_rank_at_','')
# final_precision_denominator['rank'] = final_precision_denominator['rank'].str.replace('%','')

final_precision_denominator.head()

###### <center>calculating clientwise precision denominator

In [None]:
groupby_sum_cols = [col for col in ranked_d.columns if 'rank_at' in col]
clientwise_precision_denominator = ranked_d.groupby(['client'])[groupby_sum_cols].sum().reset_index()
for col in clientwise_precision_denominator.columns:
    if 'rank_at_' in col: 
        clientwise_precision_denominator.rename(columns= {col:'denom_'+col},inplace=True)


final_clientwise_precision_denominator = pd.melt(clientwise_precision_denominator, id_vars=['client'], 
                                                 value_vars=clientwise_precision_denominator.columns[1:].tolist(),
                                                var_name='rank', value_name='denom_precision')

final_clientwise_precision_denominator.sort_values(['client'],inplace=True)
final_clientwise_precision_denominator['rank'] = final_clientwise_precision_denominator['rank'].str.replace('denom_rank_at_','')
# final_clientwise_precision_denominator['rank'] = final_clientwise_precision_denominator['rank'].str.replace('%','')
final_clientwise_precision_denominator.head()

### <center> merging precision numerator and denominator

In [None]:
precision_df = final_precision_numerator.merge(final_precision_denominator,how='inner', on = ['client', 'facilityid','rank'])
precision_df.head()

In [None]:
clientwise_precision_df = final_clientwise_precision_numerator.merge(final_clientwise_precision_denominator,how='inner', on = ['client','rank'])
clientwise_precision_df.head()

## Calculating recall

##### <center> calculating recall denominator

In [None]:
base_recall_df = base_df.groupby(['client','facilityid','facilityname','masterpatientid', 'dateoftransfer','resident_transferred'])['rank_cutoff'].min().reset_index(name='min_rank')


In [None]:
combined_list =[]
facilitynames = base_recall_df.facilityname.unique().tolist()
for date in pd.date_range(start=START_DATE, end=END_DATE):
    for facilityname in facilitynames:
        one_to_max = pd.Series(range(1, base_recall_df[base_recall_df['facilityname']==facilityname]['min_rank'].max().astype(int)+1))
        one_to_max = pd.DataFrame({'rank':one_to_max})
        
        specific_date_facility_df = base_recall_df[(base_recall_df['dateoftransfer']==date) & (base_recall_df['facilityname']==facilityname)]
        
        if len(specific_date_facility_df):
            specific_date_facility_df = specific_date_facility_df.merge(one_to_max,left_on='min_rank', right_on='rank', how='right')
            specific_date_facility_df.loc[:,['client', 'facilityname', 'facilityid','dateoftransfer']] = specific_date_facility_df.loc[:,['client', 'facilityname', 'facilityid','dateoftransfer']].ffill()
            specific_date_facility_df['resident_transferred'].fillna(0,inplace=True)
            specific_date_facility_df['min_rank'] = specific_date_facility_df['rank']
            specific_date_facility_df = specific_date_facility_df.drop('rank',axis=1)
            specific_date_facility_df = specific_date_facility_df.sort_values(by=['dateoftransfer','client','facilityname', 'facilityid','min_rank'])
            combined_list.append(specific_date_facility_df)
            
combined_base_recall_df = pd.concat(combined_list)
combined_base_recall_df = combined_base_recall_df.sort_values(by=['client','dateoftransfer','facilityname', 'facilityid','min_rank'])
combined_base_recall_df = combined_base_recall_df.reset_index(drop=True)
combined_base_recall_df.head()

In [None]:
def recall_at_k(group):
    group.loc[:, "hospitalized_cumsum"] = group.resident_transferred.cumsum()
    group.loc[:, "total_relevant"] = group.resident_transferred.sum()
    return group.reset_index(drop=True)

performance_base = (
            combined_base_recall_df.groupby(["client", "facilityname", "facilityid", "dateoftransfer"])
            .apply(recall_at_k)
            .reset_index(drop=True)
        )


transferred_df = performance_base.groupby(['client', 'facilityname', 'facilityid','min_rank']).total_relevant.sum().reset_index(name='total_residents_transferred')
if len(unranked_base_df):
    transferred_df = transferred_df.merge(unranked_base_df,how='inner', on=['client','facilityname','facilityid'])
    transferred_df['total_residents_transferred']+= transferred_df['resident_transferred']
    del transferred_df['resident_transferred']
del transferred_df['min_rank']
transferred_df.drop_duplicates(['client', 'facilityname', 'facilityid'],inplace=True)
transferred_df.head()

##### <center> calculating clientwise recall denominator

In [None]:
clientwise_transferred_df = performance_base.groupby(['client']).total_relevant.sum().reset_index(name='total_residents_transferred')
clientwise_transferred_df.head()

###### <center> calculating recall numerator

In [None]:
recall_numerator_df = base_df.merge(ranked_d, how='inner', on=['censusdate','client','facilityid'])

def recall_numerator_lambda(row):
    for pct in percent_range:
        row[f'rank_at_{pct}'] = np.where(row[f'rank_at_{pct}']>=row['rank_cutoff'],1,0)
    return row
recall_numerator_df = recall_numerator_df.apply(recall_numerator_lambda,axis=1)
recall_numerator_df.head()

In [None]:
groupby_sum_cols = [col for col in recall_numerator_df.columns if 'rank_at_' in col]
recall_numerator_df = recall_numerator_df.groupby(['client','facilityname','facilityid','dateoftransfer','masterpatientid'])[groupby_sum_cols].max().reset_index()
grouped_recall_numerator_df = recall_numerator_df.groupby(['client','facilityname','facilityid'])[groupby_sum_cols].sum().reset_index()


final_recall_numerator_df = pd.melt(grouped_recall_numerator_df, id_vars=['client', 'facilityname', 'facilityid'], 
                                                 value_vars=grouped_recall_numerator_df.columns[3:].tolist(),
                                                var_name='rank', value_name='rth_predicted')

final_recall_numerator_df.sort_values(['client','facilityid', 'facilityname'],inplace=True)
final_recall_numerator_df['rank'] = final_precision_numerator['rank'].str.replace('rank_at_','')
# final_recall_numerator_df['rank'] = final_recall_numerator_df['rank'].str.replace('%','')

final_recall_numerator_df.head()

###### <center>calculating clientwise recall numerator

In [None]:
clientwise_recall_numerator_df = recall_numerator_df.groupby(['client'])[groupby_sum_cols].sum().reset_index()

final_clientwise_recall_numerator_df = pd.melt(clientwise_recall_numerator_df, id_vars=['client'], 
                                                 value_vars=clientwise_recall_numerator_df.columns[1:].tolist(),
                                                var_name='rank', value_name='rth_predicted')

final_clientwise_recall_numerator_df.sort_values(['client'],inplace=True)
final_clientwise_recall_numerator_df['rank'] = final_clientwise_recall_numerator_df['rank'].str.replace('rank_at_','')
# final_clientwise_recall_numerator_df['rank'] = final_clientwise_recall_numerator_df['rank'].str.replace('%','')
final_clientwise_recall_numerator_df.head()

### <center> merging recall numerator and denominator

In [None]:
recall_df = final_recall_numerator_df.merge(transferred_df,how='inner', on = ['client', 'facilityname', 'facilityid'])
recall_df.head()

In [None]:
clientwise_recall_df = final_clientwise_recall_numerator_df.merge(clientwise_transferred_df, how='inner', on = ['client'])
clientwise_recall_df.head()

### <center> Calculating Precision-Recall

In [None]:
precision_recall_df = precision_df.merge(recall_df,how='inner', on=['client', 'facilityid', 'rank'])
precision_recall_df['precision_at_k%'] = precision_recall_df['num_precision']/precision_recall_df['denom_precision']
precision_recall_df['recall_at_k%'] = precision_recall_df['rth_predicted']/precision_recall_df['total_residents_transferred']
precision_recall_df.to_csv(f'percentwise_precision_recall_at_k_{START_DATE}_{END_DATE}.csv',index=False)
precision_recall_df.head()

In [None]:
clientwise_precision_recall_df = clientwise_precision_df.merge(clientwise_recall_df,how='inner', on=['client', 'rank'])
clientwise_precision_recall_df.head()

In [None]:
clientwise_precision_recall_df['precision_at_k%'] = precision_recall_df['num_precision']/precision_recall_df['denom_precision']
clientwise_precision_recall_df['recall_at_k%'] = precision_recall_df['rth_predicted']/precision_recall_df['total_residents_transferred']
clientwise_precision_recall_df.to_csv(f'percentwise_clientwise_precision_recall_at_k_{START_DATE}_{END_DATE}.csv',index=False)

clientwise_precision_recall_df.head()