In [None]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns',None)
import json

import boto3
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy import text

In [None]:
!pip install plotly-express --quiet
!pip install matplotlib --quiet
!pip install seaborn --quiet
import plotly.express as px
import plotly
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_postgresdb_engine(self):
        """
        Based on the environment connects to the respective database
        :param client: client name
        :return: Saivadb Postgres engine
        """
        log_message(message_type='info', action_type='connect_to_postgresdb', client='SaivaDB')
        # Fetch credentials from AWS Secrets Manager
        postgresdb_info = self.get_secrets(secret_name=f'prod-saivadb')
        # Create DB URL
        saivadb_url = URL(
            drivername='postgresql',
            username=postgresdb_info['username'],
            password=postgresdb_info['password'],
            host=postgresdb_info['host'],
            port=postgresdb_info['port'],
            database=postgresdb_info['dbname'],
        )
        # Return Postgres Engine
        return create_engine(saivadb_url, echo=False)
    
    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'dev-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)
    
    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity




In [None]:
engine = DbEngine()
client_engine =  engine.get_postgresdb_engine()

In [None]:
client_info_dict = {
#     'avante':['2021-06-24', '2022-01-23'],
    'champion':['2021-07-29', '2022-01-23'],
    'marquis':['2021-10-14', '2022-01-23'],
    'midwest':['2021-06-24', '2022-01-23'],
    'mmh':['2021-06-24', '2022-01-23'],
    'phcp':['2021-10-14', '2022-01-23'],
    'trio':['2021-10-05', '2022-02-15'],
    'uch':['2021-06-24', '2022-01-23'],
    'vintage':['2021-10-14', '2022-01-23'],
    'coxsunshine':['2021-09-24', '2022-01-23'],
    'mmi':['2021-09-27', '2022-01-23']
    
}

# client_info_dict = {
#     'marquis':['2021-10-01', '2022-03-31'],
    
# }

In [None]:
def query_generator(client,deploy_v3_start_date,deploy_v3_end_date):
    query = f"""
    with rh as (
        select ht.*,
        fa.facilityname,
        fp.masterpatientid,
        dp.modelid,
        dp.predictionrank,
        dp.censusdate,
        dp.show_in_report,
        fp.patientmrn,
        fp.firstname,
        fp.lastname
        from public.hospital_transfers ht
            left join public.facility_patient fp
            on ht.client = fp.client
            and ht.facilityid = fp.facilityid
            and ht.patientid = fp.patientid
                left join daily_predictions dp
                on ht.client = dp.client
                and ht.facilityid = dp.facilityid
                and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 10
                and date(dp.censusdate) <= date(ht.dateoftransfer)
                and fp.masterpatientid = dp.masterpatientid
                left join facility fa
                on fa.facilityid = ht.facilityid
                and fa.client = ht.client
        where (dp.published = True or dp.published is null)
          and ht.dateoftransfer >= '2020-01-01 00:00:00'
          and (dp.experiment_group = True or dp.experiment_group is null)
          and fa.is_active=true
          and ht.planned='No'
          and ht.client='{client}'
          and dp.censusdate between '{deploy_v3_start_date}' and '{deploy_v3_end_date}'
          and (ht.outcome !='ED Visit Only' or ht.outcome is null)
          and (lower(ht.payerdescription) NOT LIKE '%hospice%' 
          or ht.payerdescription is null)

        )
        SELECT rh.client,
               rh.facilityid,
               rh.facilityname,
               rh.modelid,
               rh.patientid,
               rh.masterpatientid,
               rh.patientmrn,
               rh.lastname,
               rh.firstname,
               rh.censusdate,
               rh.dateoftransfer,
               rh.lastadmissiondate,
               rh.planned,
               rh.transferreason,
               rh.otherreasonfortransfer,
               rh.outcome,
               rh.transferredto,
               rh.lengthofstay,
               rh.transferredwithin30daysofadmission,
               rh.payertype,
               rh.payerdescription,
               rh.predictionrank as rank_cutoff,
               bool_or(rh.show_in_report) as show_in_report,
               (CASE
                    WHEN bool_or(rh.show_in_report) IS NULL
                        THEN 0
                    ELSE count(*)
                   END
                   ) as num_predictions
        FROM rh
        GROUP BY rh.client, rh.facilityid, rh.facilityname, rh.modelid,
                 rh.patientid, rh.masterpatientid, rh.patientmrn, rh.lastname, rh.firstname, rh.censusdate, rh.dateoftransfer, 
                 rh.lastadmissiondate, rh.planned, rh.transferreason, rh.otherreasonfortransfer, rh.outcome,
                 rh.transferredto, rh.lengthofstay, rh.transferredwithin30daysofadmission, rh.payertype, rh.payerdescription,rh.predictionrank
    """
    return query

In [None]:
combined_list = []
for client in client_info_dict.keys():
    deploy_v3_start_date = client_info_dict[client][0]
    deploy_v3_end_date = client_info_dict[client][1]
    query = query_generator(client, deploy_v3_start_date, deploy_v3_end_date)
    base_df = pd.read_sql(text(query), con = client_engine)
    base_df = base_df.sort_values(by=['client', 'facilityid', 'facilityname', 'masterpatientid', 'censusdate', 'dateoftransfer', 'rank_cutoff'])
    base_df['days_difference'] = (base_df['dateoftransfer'] - base_df['censusdate']).dt.days
    print(f'processed for client {client} --> dataframe shape {base_df.shape}')
    combined_list.append(base_df)
    


In [None]:
combined_df = pd.concat(combined_list)
combined_df.head()

In [None]:
combined_df.facilityid.nunique()

In [None]:
combined_df.shape

In [None]:
combined_df['days_difference'].value_counts()

In [None]:
#uncomment the below line for recall where LOS<=30.
# combined_df = combined_df[combined_df['transferredwithin30daysofadmission']==1]

In [None]:
df = combined_df.groupby(['client','show_in_report','days_difference']).size().reset_index(name='value')
df

In [None]:
final_list = []
day_range = [i for i in range(10,-1,-1)]
for client in df['client'].unique():
    day_wise_stats = []
    for day in day_range:
        patient_ranked = df.loc[(df['client']==client)&(df['days_difference']==day)]['value'].sum()
        patient_reported = df.loc[(df['client']==client)&
           (df['days_difference']==day)&
           (df['show_in_report']==True)]['value'].sum()
        recall = round(((100*patient_reported)/patient_ranked),2)
        day_wise_stats.extend([patient_reported, patient_ranked, recall])
    final_list.append([client]+day_wise_stats)


In [None]:
stat_columns = []
for day in day_range:
    stat_columns.extend([f'd{day}_patient_reported', f'd{day}_patient_ranked',f'd{day}_recall'])



In [None]:
final_df = pd.DataFrame(final_list,columns=['client']+stat_columns)
final_df

In [None]:
final_df = final_df[['client']+[col for col in final_df.columns if 'recall' in col]]
transposed_df = final_df.T
header = transposed_df.iloc[0]
transposed_df.reset_index(drop=True)
transposed_df.columns = header
transposed_df.columns.name = None
transposed_df = transposed_df.iloc[1:]
transposed_df 

In [None]:
fig = px.line(transposed_df,  x = transposed_df.index, y=transposed_df.columns, markers=True)
fig.update_layout(
    title=f"Recall at d-X days.",
    xaxis_title="d-X days",
    yaxis_title="Recall",
    legend_title="Clients",   
)
fig.update_yaxes(rangemode="tozero")
fig.show()

In [None]:
import os
plotly.offline.plot(fig, filename=os.path.join('.','Recall_at_dminusX_days.html'))
