In [None]:
"""
For facilities having more than 100 patients.
We want to send the reports unitwise/floorwise
This code provides an analysis about 
- patient count per census date
- unitwise patient count per censusdate
- floorwise patient count per censusdate
- recall at different cutoff ranks per month

"""

In [None]:
import sys
import pandas as pd
from pathlib import Path

from saiva.model.shared.database import DbEngine
import multiprocessing as mp
import numpy as np
import os
import re
import string
import pandas as pd
import datetime

##### select the client, facilityid, start_date, end_date,cutoff_rank accordingly.

In [None]:
client='infinity-benchmark'
facilityid=37
start_date = '2020-04-01'
end_date = '2020-09-30'
cutoff_rank = 60

month_mapper = {
    4:'april',
    5:'may',
    6:'june',
    7:'july',
    8:'august',
    9:'september'
}

In [None]:
engine = DbEngine()
client_engine =  engine.get_sqldb_engine(clientdb_name=client)
# initialising postgres engine
postgres_engine =  engine.get_postgresdb_engine()
assert client_engine.execute('select 1').fetchall() is not None # verify connectivity 
print('===== Connection with db established =====')

## patient count per censusdate

In [None]:
census_query = f"""
select cast(censusdate AS DATE) as censusdate, count(*) as facility_patient_count
from view_ods_daily_census_v2 a
where a.facilityid ={facilityid}
and censusdate>='{start_date}'
and censusdate<='{end_date}'
group by censusdate
order by censusdate 
"""

census_df = pd.read_sql(census_query, con=client_engine)
census_df.head(10)

## patient count per unitid per censusdate

In [None]:
unit_census_query = f"""
select cast(censusdate AS DATE) as censusdate, unitid, count(*) as patient_count
from view_ods_daily_census_v2 a
left join view_ods_bed b
on a.facilityid = b.facilityid
and a.bedid = b.bedid
where a.facilityid ={facilityid}
and censusdate>='{start_date}'
and censusdate<='{end_date}'
group by censusdate, unitid
order by censusdate 
"""

unit_census_df = pd.read_sql(unit_census_query, con=client_engine)
unit_census_df['unitid'] = 'unit_'+unit_census_df['unitid'].astype(str)+'_patient_count'
unit_census_df = unit_census_df.pivot(index='censusdate', columns='unitid', values='patient_count').reset_index()
unit_census_df.head(10)

In [None]:
# combining the above two results

total_census_df = pd.merge(census_df,unit_census_df,on='censusdate',how='inner')
total_census_df.head()

## patient count per floorid per censusdate

In [None]:
floor_census_query = f"""
select cast(censusdate AS DATE) as censusdate, floorid, count(*) as patient_count
from view_ods_daily_census_v2 a
left join view_ods_bed b
on a.facilityid = b.facilityid
and a.bedid = b.bedid
where a.facilityid ={facilityid}
and censusdate>='{start_date}'
and censusdate<='{end_date}'
group by censusdate, floorid
order by censusdate 
"""

floor_census_df = pd.read_sql(floor_census_query, con=client_engine)
floor_census_df['floorid'] = 'floor_'+floor_census_df['floorid'].astype(str)+'_patient_count'
floor_census_df = floor_census_df.pivot(index='censusdate', columns='floorid', values='patient_count').reset_index()

floor_census_df.head(10)

In [None]:
# combining facilitywise,unitwise,floorwise results


total_census_df = pd.merge(total_census_df, floor_census_df, on='censusdate', how='inner')
total_census_df.head()

## recall at different cutoff ranks per month

In [None]:
cutoff_rank_list = [15,30,45,60]
date_ranges = [('2020-04-01', '2020-04-30'), ('2020-05-01', '2020-05-31'), ('2020-06-01', '2020-06-30'), ('2020-07-01', '2020-07-31'), ('2020-08-01', '2020-08-31'), ('2020-09-01', '2020-09-30')]
date_range_mapper = {
    ('2020-04-01', '2020-04-30'):'april',
    ('2020-05-01', '2020-05-31'):'may',
    ('2020-06-01', '2020-06-30'):'june',
    ('2020-07-01', '2020-07-31'):'july',
    ('2020-08-01', '2020-08-31'):'august',
    ('2020-09-01', '2020-09-30'):'september',

}

In [None]:
output = pd.DataFrame(columns=['date_range','rank_cutoff','recall'])

for cutoff_rank in cutoff_rank_list:
    for date in date_ranges:
        
        transfersqlquery = f"""
        select distinct DateOfTransfer, a.PatientID,b.MasterpatientID
        from view_ods_hospital_transfers_transfer_log_v2 a
        left join view_ods_facility_patient b
        on a.patientid = b.patientid
        and a.facilityid = b.facilityid
        where a.facilityid={facilityid}
        and DateOfTransfer>='{date[0]}'
        and DateOfTransfer<='{date[1]}'
        and planned='No'
        order by DateOfTransfer asc
        """
        
        transfer_df = pd.read_sql(transfersqlquery, con=client_engine)
        # patients who were actually transferred between the gven date range 
        transfer_df['DateOfTransfer'] = transfer_df['DateOfTransfer'].dt.normalize()

        
        # patient transfers reported by saiva model
        query = f"""
        select *
        from daily_predictions dp 
        where
        facilityid ={facilityid}
        and predictionrank <={cutoff_rank}
        and censusdate between '{date[0]}' and '{date[1]}'
        order by censusdate, predictionrank 
        """
        predicted_df = pd.read_sql(query, postgres_engine)
        
        patient_who_went_to_hospital = [] #patient who went to the hospital
        patient_went_patient_reported_intersection = [] #patients who went to the hospital and were reported by the model
        
        for date_range in pd.date_range(date[0], date[1]).tolist():
        #     list of patient reported by saiva model on a particular date
            patients_reported_by_model = predicted_df.loc[predicted_df['censusdate']==date_range,'masterpatientid'].tolist()
            
        #     for date to date+3 days, we find the people who were rehospitalized
            for transferdate in pd.date_range(date_range, date_range + datetime.timedelta(days=3)).tolist():
                # list of patient who went to the hospital
                transferred_patient = transfer_df.loc[transfer_df['DateOfTransfer']==transferdate,'MasterpatientID'].tolist()
                if transferred_patient:
                    # if patients were actually transferred on a particular day
#                   # we count the number of transfers 
                    # and we count the number of transfers correctly predicted by the model
                    patient_who_went_to_hospital.extend(transferred_patient)
                    patient_went_patient_reported_intersection.extend([patient for patient in transferred_patient if patient in patients_reported_by_model])
        
        recall = len(set(patient_went_patient_reported_intersection))/len(set(patient_who_went_to_hospital))
        output.loc[-1] = [date, 'recall_at_rank_'+str(cutoff_rank), recall]
        output.reset_index(drop=True,inplace=True)
        
output_pivoted = output.pivot(index='date_range', columns='rank_cutoff', values='recall').reset_index()
output_pivoted.columns.name = None
output_pivoted['date_range'] = output_pivoted['date_range'].map(date_range_mapper)
output_pivoted

## unitwise recall at different rank cutoffs per censusdate

In [None]:
unitwise_output = pd.DataFrame(columns=['date_range', 'unitid', 'rank_cutoff', 'patient_distribution','recall'])
unitwise_distribution_list= []
for cutoff_rank in cutoff_rank_list:
    for date in date_ranges:
        
        transfersqlquery = f"""
        select distinct DateOfTransfer, a.PatientID,b.MasterpatientID
        from view_ods_hospital_transfers_transfer_log_v2 a
        left join view_ods_facility_patient b
        on a.patientid = b.patientid
        and a.facilityid = b.facilityid
        where a.facilityid={facilityid}
        and DateOfTransfer>'{date[0]}'
        and DateOfTransfer<='{date[1]}'
        and planned='No'
        order by DateOfTransfer asc
        """
        transfer_df = pd.read_sql(transfersqlquery, con=client_engine)
        transfer_df['DateOfTransfer'] = transfer_df['DateOfTransfer'].dt.normalize()


        # patient transfer reported by saiva model
        daily_predictions_query = f"""
        select *
        from daily_predictions dp 
        where
        facilityid ={facilityid}
        and predictionrank <={cutoff_rank}
        and censusdate between '{date[0]}' and '{date[1]}'
        order by censusdate, predictionrank 
        """
        predicted_df = pd.read_sql(daily_predictions_query, postgres_engine)
        
        census_query = f"""
        select distinct clientid, c.masterpatientid, censusdate, a.facilityid, a.bedid, b.unitid, b.floorid
        from view_ods_daily_census_v2 a
        left join view_ods_bed b 
        on a.facilityid = b.facilityid
        and a.bedid = b.bedid 
        left join view_ods_facility_patient c
        on c.patientid = a.clientid
        and c.facilityid = a.facilityid
        where a.facilityid={facilityid}
        and censusdate>='{date[0]}'
        and censusdate<='{date[1]}'
        order by censusdate,clientid
        """
        census_df = pd.read_sql(census_query, client_engine)
        unique_unitids = census_df['unitid'].unique().tolist()
        patient_went_patient_reported_intersection = []
        patient_who_went_to_hospital = []
        total_patients_reported_by_model = predicted_df['masterpatientid'].tolist()
        
        for date_range in pd.date_range(date[0], date[1]).tolist():
        #     list of patient reported by saiva model on a particular date
            patients_reported_by_model = predicted_df.loc[predicted_df['censusdate']==date_range,'masterpatientid'].tolist()
            temp_unitwise_distribution = census_df.loc[(census_df['masterpatientid'].isin(patients_reported_by_model)) & (census_df['censusdate']==date_range)]['unitid'].value_counts().reset_index()
            temp_unitwise_distribution['censusdate']=date_range
            temp_unitwise_distribution['rank']=cutoff_rank
            temp_unitwise_distribution.rename(columns = {'unitid':'count','index':'unitid',}, inplace = True)
            temp_unitwise_distribution['unitid'].fillna(0,inplace=True)
            temp_unitwise_distribution = pd.pivot_table(temp_unitwise_distribution, values = 'count', index=['censusdate','rank'], columns = ['unitid']).reset_index()
            unitwise_distribution_list.append(temp_unitwise_distribution)
        #     for date to date+3 days, we find the people who were rehospitalized
        
            for transferdate in pd.date_range(date_range, date_range + datetime.timedelta(days=3)).tolist():
                # list of patient who went to the hospital
                transferred_patient = transfer_df.loc[transfer_df['DateOfTransfer']==transferdate,'MasterpatientID'].tolist()
                if transferred_patient:
                    patient_who_went_to_hospital.extend(transferred_patient)
                    patient_went_patient_reported_intersection.extend([patient for patient in transferred_patient if patient in patients_reported_by_model])
        
        patient_went_patient_reported_intersection = list(set(patient_went_patient_reported_intersection))
        patient_who_went_to_hospital = list(set(patient_who_went_to_hospital))

        for unit in unique_unitids:
            patient_distribution = len([patient for patient in total_patients_reported_by_model if patient in census_df.loc[census_df['unitid']==unit,'masterpatientid'].tolist()])
            numerator = [patient for patient in patient_went_patient_reported_intersection if patient in census_df.loc[census_df['unitid']==unit,'masterpatientid'].tolist()]
            denominator = [patient for patient in patient_who_went_to_hospital if patient in census_df.loc[census_df['unitid']==unit,'masterpatientid'].tolist()]
            if len(denominator):
                recall = len(numerator) /len(denominator)
            else:
                recall = 0
            unitwise_output.loc[-1] = [date, unit, 'recall_at_rank_cutoff_'+str(cutoff_rank), patient_distribution, recall]
            unitwise_output.reset_index(drop=True,inplace=True)
            
unitwise_distribution_list = [i for i in unitwise_distribution_list if i.shape[0]!=0]

In [None]:
unitwise_distribution = pd.concat(unitwise_distribution_list)
unitwise_distribution.fillna(0,inplace=True)

for i,col in enumerate(unitwise_distribution.columns.values):
    if type(col)==int:
        unitwise_distribution[col] = unitwise_distribution[col].astype(int)

unitwise_distribution.sort_values(by=['censusdate','rank'],inplace=True)
unitwise_distribution.columns = ['unit_'+str(col)+'_patient_count' if type(col)==int else col for col in unitwise_distribution.columns ]
unitwise_distribution

## unitwise rank mean,max,min,meadin, std. dev. at different rank cutoffs and grouped on a monthly basis

### <font color='red'> Note: Please manually fill the unitids names in the below groupby aggregation code(line 3)</font>

In [None]:
monthwise_unitwise_distribution = unitwise_distribution.copy()
monthwise_unitwise_distribution['month'] = pd.DatetimeIndex(monthwise_unitwise_distribution['censusdate']).month
monthwise_unitwise_distribution['month'] = monthwise_unitwise_distribution['month'].map(month_mapper)
monthwise_unitwise_distribution = monthwise_unitwise_distribution.groupby(['month','rank'],sort=False,).agg({'unit_2107_patient_count':['max','min','mean','median','std'],'unit_2117_patient_count':['max','min','mean','median','std'],'unit_2127_patient_count':['max','min','mean','median','std'], 'unit_2128_patient_count':['max','min','mean','median','std'],'unit_2137_patient_count':['max','min','mean','median','std'],'unit_2147_patient_count':['max','min','mean','median','std'] }).reset_index()
monthwise_unitwise_distribution = monthwise_unitwise_distribution.round(2)
monthwise_unitwise_distribution

### writing results in excelsheet

In [None]:
writer = pd.ExcelWriter(f'{client}_faciity{facilityid}_analysis.xlsx')
total_census_df.to_excel(writer,'daily_census_count', index=False)
output_pivoted.to_excel(writer,'monthwise_recall_at_different_ranks', index=False)
unitwise_distribution.to_excel(writer,'daily_unitwise_reportcount', index=False)
monthwise_unitwise_distribution.to_excel(writer,'monthwise_unitwise_reportcount_aggregation_analysis')
writer.save()