### Notebook to compare patient dataset densities (patient specific data/patient count) on a montly basis between different clients

Pre-requisites:
* run the notebook from you local machine as it runs on production data.
    

In [None]:
import json
import boto3
import numpy as np
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import pandas as pd
!pip install matplotlib
from functools import reduce
import matplotlib.pyplot as plt

In [None]:
# database class.

class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'prod-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)

    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity


In [None]:
# all available clients= name. Select clients whose output has to be compared.
# clients = ['trio','dycora','northshore','gulfshore','infinity-infinity','infinity-benchmark','palmgarden','avante']
clients = ['avante']

# select month range. 1:January, 12:December.
start_month = 1
end_month = 8

# censusactioncode like deceased and in hospital are not included.
not_included_censusactioncode = ['L', 'DH', 'DD', 'DE', 'TO', 'DRNA', 'DRA', 'PBH', 'RDD', 'RDE']


In [None]:
# Queries

patient_census_query = f""" 
    select MONTH (censusdate ) as Month,YEAR (censusdate) as Year ,facilityid as FacilityID, count(*) as Patients_total
    from view_ods_daily_census_v2 
    where
    MONTH (censusdate) between {start_month} and {end_month} and
    YEAR (censusdate )='2020'
    group by MONTH (censusdate ),YEAR (censusdate), facilityid
    order by facilityid, MONTH (censusdate) 
    """

vitals_query = f""" 
    select MONTH (date) as Month, YEAR (date) as Year, facilityid as FacilityID, count(*) as Vitals_count
    from view_ods_Patient_weights_vitals 
    where 
    YEAR (date)='2020' and
    MONTH (date) between {start_month} and {end_month}
    group by MONTH (date),YEAR (date),facilityid
    order by facilityid, MONTH (date)
    """

diagnosis_query = f"""
    select MONTH (onsetdate) as Month, YEAR (onsetdate) as Year, facilityid as FacilityID, count(*) as Diagnosis_count
    from view_ods_patient_diagnosis  
    where 
    YEAR (onsetdate)='2020' and
    MONTH (onsetdate) between {start_month} and {end_month}
    group by MONTH (onsetdate),YEAR (onsetdate),facilityid
    order by facilityid, MONTH (onsetdate)
    """

progress_notes_query = f"""
    select MONTH (createddate) as Month, YEAR (createddate) as Year, facilityid as FacilityID, count(*) as Progress_notes_count
    from view_ods_progress_note   
    where 
    YEAR (createddate)='2020' and
    MONTH (createddate) between {start_month} and {end_month}
    and progressnotetype like 'eMAR%'
    group by MONTH (createddate),YEAR (createddate), facilityid
    order by facilityid, MONTH (createddate)
    """

alert_query = f"""
select MONTH (createddate) as Month, YEAR (createddate) as Year, facilityid as FacilityID, count(*) as Alerts_count
from view_ods_cr_alert voca
where 
YEAR (createddate)='2020' and
MONTH (createddate) between {start_month} and {end_month}
group by MONTH (createddate),YEAR (createddate), facilityid
order by facilityid, MONTH (createddate)
"""

order_query = f"""
select MONTH (orderdate) as Month, YEAR (orderdate) as Year, facilityid as FacilityID, count(*) as Orders_count
from view_ods_physician_order_list_v2 
where 
YEAR (orderdate)='2020' and
MONTH (orderdate) between {start_month} and {end_month}
and ordercategory in ('Diagnostic', 'Enteral - Feeding', 'Dietary - Diet', 'Dietary - Supplements')
group by MONTH (orderdate),YEAR (orderdate), facilityid
order by facilityid, MONTH (orderdate)
"""

rehosp_query = f"""
select MONTH (dateoftransfer) as Month, YEAR (dateoftransfer) as Year, facilityid as FacilityID, count(*) as Rehosps_count
from view_ods_hospital_transfers_transfer_log_v2  
where 
YEAR (dateoftransfer)='2020' and
MONTH (dateoftransfer) between {start_month} and {end_month}
group by MONTH (dateoftransfer),YEAR (dateoftransfer), facilityid
order by facilityid, MONTH (dateoftransfer)
"""



In [None]:
# Only for Avante DB direct access!!
incidents_query = f"""
select MONTH(IncidentDate) as Month, YEAR(IncidentDate) as Year, FacilityID as FacilityID, count(*) as Incidents_count
FROM view_ods_inc_incident
WHERE 
YEAR (IncidentDate)='2020'
and MONTH (IncidentDate) between {start_month} and {end_month}
and typeid in (151, 153)
group by MONTH (IncidentDate),YEAR (IncidentDate), facilityid
order by facilityid, MONTH (IncidentDate)
"""

### Run the first cell if you are not doing incident density Analysis.  Or else, run the 2nd cell!

In [None]:
main_df = pd.DataFrame(columns=['Client', 'Month', 'Year', 'FacilityID', 'Patients_total', 'Vitals_density', 'Diagnosis_density', 'Progress_notes_density'])


In [None]:
main_df = pd.DataFrame(columns=['Client', 'Month', 'Year', 'FacilityID', 'Patients_total', 'Vitals_density', 'Diagnosis_density', 'Progress_notes_density', 'Incidents_density'])


In [None]:

for client in clients:
    print(f'--------------------Processing for {client}-------------------------------')
#     connecting with client engine
    engine = DbEngine()
    client_engine =  engine.get_sqldb_engine(clientdb_name=client)
    patient_census_df = pd.read_sql(patient_census_query, con = client_engine)
#     if censusactioncode is present as a column then removing unwanted actioncodes.
    if 'censusactioncode' in patient_census_df.columns:
        patient_census_df = patient_census_df[~patient_census_df['censusactioncode'].isin(not_included_censusactioncode)]
#         reading vitals, diagnosis, progressnotes,alerts, meds, rehosps and orders.

    vitals_df = pd.read_sql(vitals_query, con = client_engine)
    diagnosis_df = pd.read_sql(diagnosis_query, con = client_engine)
    progress_notes_df = pd.read_sql(progress_notes_query, con = client_engine)
    alerts_df = pd.read_sql(alert_query, con = client_engine)
    rehosps_df = pd.read_sql(rehosp_query, con = client_engine)
    orders_df = pd.read_sql(order_query, con = client_engine)
    incidents_df = pd.read_sql(incidents_query, con = client_engine)
    
#     merging all the dfs on the basis of 'Month', 'Year', 'FacilityID'.
    client_df = reduce(lambda x,y: pd.merge(x,y, on=['Month', 'Year', 'FacilityID'], how='outer'), 
                       [patient_census_df, vitals_df, diagnosis_df, progress_notes_df, alerts_df, rehosps_df, orders_df, incidents_df])
    client_df.insert(loc=0, column='Client', value=client)
#     filling the nan values
    client_df.fillna(0, inplace=True)
    print(f'Converting data count into density.')
#     converting patient signal measurement counts to density
    for col in client_df.columns[5:]:
        if 'count' in col:
            client_df[col] = round((client_df[col]/client_df['Patients_total']),3)
    client_df.columns = [x.replace('_count','') + '_density' if 'count' in x else x for x in client_df.columns ]
#     appending data of all clients.
    print(f'Appending the data into the main dataframe.')
    main_df = main_df.append(client_df,ignore_index=True)
    print(f'********************Processing for {client} completed********************',end='\n')



In [None]:
# replacing inf and Nan values with 0.
main_df.replace(np.inf, 0, inplace=True)
main_df.fillna(0, inplace=True)
main_df.head()

In [None]:
for client in main_df.Client.unique():
    print(client, main_df[main_df['Client']==client]['FacilityID'].unique())

###  Universal client and facilityid dict. Please select the appropriate client and facility for plotting graphs.

In [None]:

# client_facility_dict = {
#     'trio': [1, 7, 21, 42, 52, 55, 186, 194, 265, 266, 267, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283],
#     'dycora': [82, 107, 108, 111, 112, 114, 115, 116, 120, 121, 127, 176, 177, 302, 328, 355, 356, 357, 358, 359, 173],
#     'gulfshore': [16],
#     'infinity-infinity': [1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 
#                           26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 
#                           51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
#                           77, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98],
#     'infinity-benchmark': [28, 29, 30, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46],
#     'palmgarden': [3,  4,  5,  6,  7,  8,  9, 1,0, 11, 12, 13, 14, 15, 16, 17],
#     'avante': [ 1, 3, 4, 5, 6, 7, 8, 9, 10, 13, 21, 22, 23, 24, 25, 27]
    
# }


### select the correct client and its corresponding facility and the density segments for which you want to create the graph.


In [None]:

client_facility_dict= {
    # 'trio': [1, 7],
    # 'gulfshore':[16],
    # 'dycora':[107],
    'avante':[1, 3, 4, 5, 6, 7, 8, 9, 10, 13, 21]
}

density_segments = ['Vitals_density', 'Diagnosis_density','Rehosps_density', 'Incidents_density']

In [None]:
# Check if the values are present for the client and facility of not 

# main_df[(main_df['Client']=='trio') & (main_df['FacilityID'].isin([1,7]))]

### graph knobs

In [None]:
# graph_dim = (20,8)
graph_dim = (8,5)
graph_linewidth = 4
xlabel_fontsize = 16
ylabel_fontsize = 16
title_fontsize = 16
legend_size = 15

In [None]:

def graph_plotter(): 
    for segment in density_segments: 
        y_list = []
        labels = []
        for client in  client_facility_dict.keys():
            for facility in client_facility_dict[client]:
                y = main_df.loc[(main_df['Client']==client) & (main_df['FacilityID']==facility), segment].tolist()
                y_list.extend([y])
                labels.append(client+'_'+str(facility))
        x = [i for i in range(start_month, end_month+1)]
        y = y_list
        plt.figure(figsize=graph_dim)
        plt.xlabel("Months", fontsize=xlabel_fontsize)
        plt.ylabel(f"{segment}", fontsize=ylabel_fontsize)
        plt.title(f"Monthwise {segment}",fontsize=title_fontsize)
        for i in range(len(y)):
            plt.plot(x,y[i],label = labels[i], linewidth=graph_linewidth)
        plt.legend(prop={'size': legend_size})
        plt.show()
        print('\n\n')

    
    
graph_plotter()

### The next cell graphs rehosps vs falls (only can be run for Avante currently)

In [None]:

def graph_rehosps_vs_falls(): 
    for client in  client_facility_dict.keys():
        for facility in client_facility_dict[client]:
            y_list = []
            labels = []
            y1 = main_df.loc[(main_df['Client']==client) & (main_df['FacilityID']==facility), 'Rehosps_density'].tolist()
            y_list.extend([y1])
            labels.append('Rehosps_density')
            
            y2 = main_df.loc[(main_df['Client']==client) & (main_df['FacilityID']==facility), 'Incidents_density'].tolist()
            y_list.extend([y2])
            labels.append('Falls_density')
            
            x = [i for i in range(start_month, end_month+1)]
            y = y_list
            plt.figure(figsize=graph_dim)
            plt.xlabel("Months", fontsize=xlabel_fontsize)
            plt.ylabel("Density", fontsize=ylabel_fontsize)
            plt.title(f"Monthwise for Client {client} Facility {facility}",fontsize=title_fontsize)
            for i in range(len(y)):
                plt.plot(x,y[i],label = labels[i], linewidth=graph_linewidth)
            plt.legend(prop={'size': legend_size})
            plt.show()
            print('\n\n')
 
graph_rehosps_vs_falls()

### Debugging queries (Don't need to run)

In [None]:
incidents_query = f"""
select MONTH(IncidentDate) as Month, YEAR(IncidentDate) as Year, FacilityID as FacilityID, count(*) as Incidents_count
FROM view_ods_inc_incident
WHERE 
YEAR (IncidentDate)='2020'
and MONTH (IncidentDate) between {start_month} and {end_month}
and typeid in (151, 153)
group by MONTH (IncidentDate),YEAR (IncidentDate), facilityid
order by facilityid, MONTH (IncidentDate)
"""

In [None]:
engine = DbEngine()
client_engine =  engine.get_sqldb_engine(clientdb_name='avante')
incidents_df = pd.read_sql(incidents_query, con = client_engine)
incidents_df

In [None]:
rehosp_query = f"""
select MONTH (dateoftransfer) as Month, YEAR (dateoftransfer) as Year, facilityid as FacilityID, count(*) as Rehosps_count
from view_ods_hospital_transfers_transfer_log_v2  
where 
YEAR (dateoftransfer)='2020' and
MONTH (dateoftransfer) between {start_month} and {end_month}
group by MONTH (dateoftransfer),YEAR (dateoftransfer), facilityid
order by facilityid, MONTH (dateoftransfer)
"""

In [None]:
engine = DbEngine()
client_engine =  engine.get_sqldb_engine(clientdb_name='avante')
rehosp_df = pd.read_sql(rehosp_query, con = client_engine)
rehosp_df

In [None]:
main_df