In [1]:
import sys
import os
import pandas as pd
sys.path.insert(0, '/src')
from eliot import to_file

import json

import boto3
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL

import numpy as np
to_file(sys.stdout)

In [2]:
!pip install plotly-express --quiet
!pip install matplotlib --quiet
!pip install seaborn --quiet

In [3]:
import matplotlib.patches as mpatches
import plotly.express as px
import plotly
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


In [5]:

CLIENT = 'avante'
TRAIN_START_DATE = '2021-01-15'
TEST_END_DATE = '2021-10-15'
FACILITYIDS = (1, 3, 4, 5, 7, 13, 16, 19, 20, 21, 23, 26, 29, 31, 32, 33, 36)
ENABLE_ALL_FACILITIES = "true"
env = os.environ.get('SAIVA_ENV','prod')
date_range = f'{TRAIN_START_DATE} to {TEST_END_DATE}'

print(CLIENT)
print(env)
print(TRAIN_START_DATE, TEST_END_DATE)
print(FACILITYIDS)

avante
prod
2021-01-15 2021-10-15
(1, 3, 4, 5, 7, 13, 16, 19, 20, 21, 23, 26, 29, 31, 32, 33, 36)


In [6]:
class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_postgresdb_engine(self):
        """
        Based on the environment connects to the respective database
        :param client: client name
        :return: Saivadb Postgres engine
        """
        log_message(message_type='info', action_type='connect_to_postgresdb', client='SaivaDB')
        # Fetch credentials from AWS Secrets Manager
        postgresdb_info = self.get_secrets(secret_name=f'{env}-saivadb')
        # Create DB URL
        saivadb_url = URL(
            drivername='postgresql',
            username=postgresdb_info['username'],
            password=postgresdb_info['password'],
            host=postgresdb_info['host'],
            port=postgresdb_info['port'],
            database=postgresdb_info['dbname'],
        )
        # Return Postgres Engine
        return create_engine(saivadb_url, echo=False)
    
    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'{env}-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)
    
    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity



In [7]:
engine = DbEngine()
saiva_engine = engine.get_postgresdb_engine()
client_sql_engine = engine.get_sqldb_engine(clientdb_name=CLIENT)

{"action_type": "connect_to_postgresdb", "client": "SaivaDB", "timestamp": 1635822844.0311444, "task_uuid": "460f94e9-2f8b-4a83-a14b-6ca053d1970b", "task_level": [1], "message_type": "info"}
{"action_type": "get_secrets", "secret_name": "prod-saivadb", "timestamp": 1635822844.031853, "task_uuid": "ca932fc6-9a5d-4270-b693-53ed04549a79", "task_level": [1], "message_type": "info"}
{"action_type": "connect_to_sqldb", "client": "avante", "timestamp": 1635822844.2046854, "task_uuid": "fb96e7ab-0ef3-4875-b03e-11c3f052b330", "task_level": [1], "message_type": "info"}
{"action_type": "get_secrets", "secret_name": "avantedb", "timestamp": 1635822844.2053769, "task_uuid": "2eb54d3e-2f8c-4e1b-a624-e40bd7db3048", "task_level": [1], "message_type": "info"}


## Verify whether the lastadmissiondate present in transfers table in present in admission table

In [8]:
# query = f"""
# select al.patientid, al.facilityid, al.dateofadmission, al.admissionstatus, al.ToFromTypeDescription, 
# fp.masterpatientid, fp.recentadmissiondate, fp.initialadmissiondate, fp.PatientDeleted 
# from view_ods_hospital_transfers_admission_log al 
# left join view_ods_facility_patient fp on (al.patientid = fp.patientid and al.facilityid = fp.facilityid)
# and al.facilityid in {FACILITYIDS}
# """

# adf = pd.read_sql(query, con=client_sql_engine)
# adf['dateofadmission'] = pd.to_datetime(adf['dateofadmission']).dt.date
# adf.head()

In [9]:
# adf.query('(masterpatientid == 483241) ')    

In [10]:
# rdf.query('(masterpatientid == 483241)')[['patientid','masterpatientid', 'lastadmissiondate','lengthofstay','show_in_report','best_exp_rank']]

## Harsh RTH from SAIVADB

In [11]:
query = f"""with rh as (
    select ht.*,
           fa.facilityname,
           fp.masterpatientid,
           dp.modelid,
           dp.group_rank,
           dp.show_in_report,
           fp.patientmrn,
           fp.firstname,
           fp.lastname
    from public.hospital_transfers ht
             left join public.facility_patient fp
                       on ht.client = fp.client
                           and ht.facilityid = fp.facilityid
                           and ht.patientid = fp.patientid
             left join daily_predictions dp
                       on ht.client = dp.client
                           and ht.facilityid = dp.facilityid
                           and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 3
                           and (date(ht.dateoftransfer) - date(dp.censusdate)) > 0
                           and date(dp.censusdate) <= date(ht.dateoftransfer)
                           and fp.masterpatientid = dp.masterpatientid
             left join facility fa
                       on fa.facilityid = ht.facilityid
                           and fa.client = ht.client
    where ht.client = '{CLIENT}'
      and ht.dateoftransfer between '{TRAIN_START_DATE}' and '{TEST_END_DATE}'
      and (lower(ht.payerdescription) NOT LIKE '%%hospice%%' or ht.payerdescription is null)
      and (ht.outcome <> 'ED Visit Only' or ht.outcome is null)
      and ht.planned = 'No'
      and fa.is_active=True
      and (ht.facilityid IN {FACILITYIDS} OR {ENABLE_ALL_FACILITIES})
)
SELECT rh.client,
       rh.facilityid,
       rh.facilityname,
       rh.patientid,
       rh.masterpatientid,
       rh.patientmrn,
       rh.lastname,
       rh.firstname,
       rh.dateoftransfer,
       rh.planned,
       rh.transferreason,
       rh.otherreasonfortransfer,
       rh.outcome,
       rh.transferredto,
       rh.lengthofstay,
       rh.lastadmissiondate,
       rh.payertype,
       rh.payerdescription,
       min(group_rank) as best_exp_rank,
       bool_or(rh.show_in_report) as show_in_report,
       string_agg(distinct rh.modelid, ',') as modelids,
       -- count of how many predictions were made for that day (the number of rows that were grouped)
       -- have to special case for when we made no predictions because there would be still be 1 row
       (CASE
            WHEN bool_or(rh.show_in_report) IS NULL
                THEN 0
            ELSE count(*)
           END
           )  as num_predictions
FROM rh
GROUP BY rh.client, rh.facilityid, rh.facilityname,
         rh.patientid, rh.masterpatientid, rh.patientmrn, rh.lastname, rh.firstname, rh.dateoftransfer,
         rh.planned, rh.transferreason, rh.otherreasonfortransfer, rh.outcome,
         rh.transferredto, rh.lengthofstay, rh.payertype, rh.payerdescription, rh.lastadmissiondate
"""

rdf = pd.read_sql(query, con=saiva_engine)
print(rdf.shape)
rdf.head()

(1199, 22)


Unnamed: 0,client,facilityid,facilityname,patientid,masterpatientid,patientmrn,lastname,firstname,dateoftransfer,planned,transferreason,otherreasonfortransfer,outcome,transferredto,lengthofstay,lastadmissiondate,payertype,payerdescription,best_exp_rank,show_in_report,modelids,num_predictions
0,avante,1,Avante at Boca Raton,177103,75244.0,7550,FRANK,JERROLD,2021-07-19,No,Other,Pacemaker,"Admitted, Inpatient",Holy Cross Hospital,800.0,2019-05-11 18:58:00,Medicaid,MCD SUNSHINE,40.0,False,ebce6fa9bdcb4040be48b756d63e4c8a,3
1,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-02,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,57.0,2020-12-07 14:55:00,Medicaid,Medicaid Pending,7.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
2,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-10,No,Chest Pain,,"Admitted, Inpatient",Boca Raton Hospital,4.0,2021-02-06 17:30:00,Medicaid,Medicaid Pending,2.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
3,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-23,No,Other,Respiratory Distress,"Admitted, Status Uncertain",Boca Raton Hospital,6.0,2021-02-17 17:00:00,Medicaid,Medicaid Pending,1.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
4,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-03-10,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,9.0,2021-03-01 15:14:00,Medicaid,Medicaid Pending,3.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3


In [12]:
def pre_process(rdf):
    rdf['dateoftransfer'] = pd.to_datetime(rdf['dateoftransfer']).dt.date
    rdf['lastadmissiondate'] = pd.to_datetime(rdf['lastadmissiondate']).dt.date
#     rdf['lengthofstay'] = (rdf['dateoftransfer'] - rdf['lastadmissiondate']).dt.days 
    rdf = rdf.sort_values(by=['facilityid','dateoftransfer'])
    rdf['lengthofstay'] = rdf['lengthofstay'].fillna(0)
    
    return rdf

In [13]:
rdf = pre_process(rdf)
rdf.head()

Unnamed: 0,client,facilityid,facilityname,patientid,masterpatientid,patientmrn,lastname,firstname,dateoftransfer,planned,transferreason,otherreasonfortransfer,outcome,transferredto,lengthofstay,lastadmissiondate,payertype,payerdescription,best_exp_rank,show_in_report,modelids,num_predictions
45,avante,1,Avante at Boca Raton,2597079,279717.0,10235,Collins,William,2021-01-15,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,51.0,2020-11-25,Medicare A,Medicare A,54.0,False,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
52,avante,1,Avante at Boca Raton,2799863,281387.0,10344,Bledsoe,Gladys,2021-01-17,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,12.0,2021-01-05,Medicare A,Medicare A,12.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
54,avante,1,Avante at Boca Raton,2837413,281589.0,10357,Amato,James,2021-01-18,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,6.0,2021-01-12,Medicare A,Medicare A,12.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
13,avante,1,Avante at Boca Raton,879997,263428.0,9776,Galanos,Susan,2021-01-19,No,Unresponsive,,"Admitted, Inpatient",Boca Raton Hospital,456.0,2019-10-21,Medicaid,MCD STAYWELL,77.0,False,d0c497c8b9b04f4d9e1e1e0c9297cc1f,3
30,avante,1,Avante at Boca Raton,2256634,276991.0,10135,Powell,Timothy,2021-01-19,No,"Abnormal Vital Signs (low/high BP, high respiratory rate)",,"Admitted, Inpatient",Boca Raton Hospital,93.0,2020-10-18,Medicaid,MCD SUNSHINE,,False,,3


In [14]:
#check computed lengthofstay and original lengthofstay

# rdf['comparision'] = np.where(rdf["new_lengthofstay"] == rdf["lengthofstay"], True, False)

# rdf.query('comparision == False')[['patientid','dateoftransfer','lastadmissiondate','lengthofstay','new_lengthofstay','transferreason','outcome']].head()

# rdf.query('comparision == False').shape



In [15]:
# day_diff = (pd.to_datetime(TEST_END_DATE) - pd.to_datetime(TRAIN_START_DATE)).days
# obj = [0 for i in range(0,120)]
# rth_matrix = [obj for i in range(0,day_diff)]
# rth_list = obj

In [16]:
# def get_day_no(date):
#     return (pd.to_datetime(date) - pd.to_datetime(TRAIN_START_DATE)).days

def get_stay_length(staylength):
    if staylength > 120:
        return 120
    else:
        return staylength

In [17]:

def get_metrics_df(rdf):
    # here each index indicates LOS and value indicates the count of transfer for that LOS
    rth = [0 for i in range(0,121)]  ## Inclcudes both ranked & Unranked 
    caught_rth = [0 for i in range(0,121)]
    missed_rth = [0 for i in range(0,121)]
    unranked_rth = [0 for i in range(0,121)] 
    ranked_rth = [0 for i in range(0,121)] 

    for index, row in rdf.iterrows():
        j = int(get_stay_length(row['lengthofstay']))
        rth[j] += 1
        if pd.notna(row['best_exp_rank']) and row['show_in_report'] == True:
            caught_rth[j] += 1
        if pd.notna(row['best_exp_rank']) and row['show_in_report'] == False:
            missed_rth[j] += 1
        if pd.isna(row['best_exp_rank']):
            unranked_rth[j] += 1
        else:
            ranked_rth[j] += 1

    # create a dataframe from the above 4 lists
    df = pd.DataFrame ({"rth": rth, "caught_rth": caught_rth, "missed_rth": missed_rth, "ranked_rth":ranked_rth, "unranked_rth": unranked_rth})

    ## percentages 

    df['rth_p'] = (df['rth']/sum(rth)) # * 100
    df['caught_rth_p'] = (df['caught_rth']/sum(caught_rth)) * 100
    df['missed_rth_p'] = (df['missed_rth']/sum(missed_rth)) * 100
    df['unranked_rth_p'] = (df['unranked_rth']/sum(unranked_rth)) * 100

    return df, rth,caught_rth, missed_rth, unranked_rth, ranked_rth    

In [18]:
df,rth,caught_rth, missed_rth, unranked_rth, ranked_rth = get_metrics_df(rdf)

# Compute Harsh recall 
cumsum_df = df[['caught_rth','rth']].cumsum()
cumsum_df['fraction_rth'] = cumsum_df['caught_rth'] / cumsum_df['rth'] ## harsh def for recall


reverse_cumsum_df = pd.DataFrame()
reverse_cumsum_df['caught_rth'] = df.loc[::-1, 'caught_rth'].cumsum()[::-1]
reverse_cumsum_df['rth'] = df.loc[::-1, 'rth'].cumsum()[::-1]
reverse_cumsum_df['reverse_fraction_rth'] = reverse_cumsum_df['caught_rth'] / reverse_cumsum_df['rth']  ## harsh def

df['reverse_fraction_rth'] = reverse_cumsum_df['reverse_fraction_rth']
df['fraction_rth'] = cumsum_df['fraction_rth']


rth_p_cumsum_df = df[['rth_p']].cumsum()
df['rth_p_cumsum'] = rth_p_cumsum_df['rth_p']

rth_p_reverse_cumsum_df = pd.DataFrame()
rth_p_reverse_cumsum_df['rth_p'] = df.loc[::-1, 'rth_p'].cumsum()[::-1]
df['rth_p_reverse_cumsum'] = rth_p_reverse_cumsum_df['rth_p']

df['rth_pct'] = df['rth_p'].map("{:.2%}".format)
# df['rth_p_cumsum'] = df['rth_p_cumsum'].map("{:.2%}".format)
# df['rth_p_reverse_cumsum'] = df['rth_p_reverse_cumsum'].map("{:.2%}".format)

# df["rth_desc"] = df["rth_pct"] + ", Cumusum:" + df["rth_p_cumsum"] + ", Reverse Cumsum:" +  df["rth_p_reverse_cumsum"]

df['rth_diff'] = df['fraction_rth'] - df['reverse_fraction_rth']

df.head()

Unnamed: 0,rth,caught_rth,missed_rth,ranked_rth,unranked_rth,rth_p,caught_rth_p,missed_rth_p,unranked_rth_p,reverse_fraction_rth,fraction_rth,rth_p_cumsum,rth_p_reverse_cumsum,rth_pct,rth_diff
0,8,1,2,3,5,0.006672,0.178891,0.383877,4.201681,0.466222,0.125,0.006672,1.0,0.67%,-0.341222
1,52,3,5,8,44,0.043369,0.536673,0.959693,36.97479,0.468514,0.066667,0.050042,0.993328,4.34%,-0.401847
2,44,10,17,27,17,0.036697,1.788909,3.262956,14.285714,0.48727,0.134615,0.086739,0.949958,3.67%,-0.352654
3,37,23,12,35,2,0.030859,4.11449,2.303263,1.680672,0.497717,0.262411,0.117598,0.913261,3.09%,-0.235306
4,26,21,5,26,0,0.021685,3.756708,0.959693,0.0,0.493384,0.347305,0.139283,0.882402,2.17%,-0.146078


In [19]:
cumsum_df.tail()

Unnamed: 0,caught_rth,rth,fraction_rth
116,502,930,0.539785
117,502,930,0.539785
118,502,931,0.539205
119,504,935,0.539037
120,559,1199,0.466222


In [20]:
reverse_cumsum_df.tail()

Unnamed: 0,caught_rth,rth,reverse_fraction_rth
116,57,271,0.210332
117,57,269,0.211896
118,57,269,0.211896
119,57,268,0.212687
120,55,264,0.208333


In [21]:
df.head(10)

Unnamed: 0,rth,caught_rth,missed_rth,ranked_rth,unranked_rth,rth_p,caught_rth_p,missed_rth_p,unranked_rth_p,reverse_fraction_rth,fraction_rth,rth_p_cumsum,rth_p_reverse_cumsum,rth_pct,rth_diff
0,8,1,2,3,5,0.006672,0.178891,0.383877,4.201681,0.466222,0.125,0.006672,1.0,0.67%,-0.341222
1,52,3,5,8,44,0.043369,0.536673,0.959693,36.97479,0.468514,0.066667,0.050042,0.993328,4.34%,-0.401847
2,44,10,17,27,17,0.036697,1.788909,3.262956,14.285714,0.48727,0.134615,0.086739,0.949958,3.67%,-0.352654
3,37,23,12,35,2,0.030859,4.11449,2.303263,1.680672,0.497717,0.262411,0.117598,0.913261,3.09%,-0.235306
4,26,21,5,26,0,0.021685,3.756708,0.959693,0.0,0.493384,0.347305,0.139283,0.882402,2.17%,-0.146078
5,36,25,11,36,0,0.030025,4.472272,2.111324,0.0,0.485465,0.408867,0.169308,0.860717,3.00%,-0.076598
6,30,22,8,30,0,0.025021,3.935599,1.535509,0.0,0.477912,0.450644,0.194329,0.830692,2.50%,-0.027268
7,34,27,6,33,1,0.028357,4.830054,1.151631,0.840336,0.469979,0.494382,0.222686,0.805671,2.84%,0.024403
8,29,20,7,27,2,0.024187,3.577818,1.34357,1.680672,0.458155,0.513514,0.246872,0.777314,2.42%,0.055359
9,25,18,6,24,1,0.020851,3.220036,1.151631,0.840336,0.45072,0.529595,0.267723,0.753128,2.09%,0.078875


## Caught, Missed & Unranked RTHs

In [22]:
temp_df = df[:-1]
graph_height = 750

In [38]:
fig = px.bar(
    df, 
    x=['caught_rth','missed_rth','unranked_rth'], 
    y=list(df.index),
    height=graph_height, orientation='h',
    title=f'Caught, Missed & Unranked RTH in {CLIENT} all facilities, {date_range}',
    labels={
        'y':'Length Of Stay', 
        'caught_rth': 'RTH Count'
    }, 
    color_discrete_sequence=['green', 'yellow', 'red']
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['title'] = "RTH"
fig.show()

##  RTH

In [39]:
fig = px.bar(
    df, 
    x="rth", 
    y=list(df.index), 
    hover_data=['rth_pct', 'rth_p_cumsum', 'rth_p_reverse_cumsum'],
    labels={
        'y':'Length Of Stay', 
        'rth': 'RTH Count', 
        'rth_pct': 'RTH Percentage',
        'rth_p_cumsum': 'RTH Percentage Cumsum',
        'rth_p_reverse_cumsum': 'RTH Percentage Reverse Cumsum',
        'rth_desc': 'Description'
    }, 
    height=graph_height, orientation='h',
    title=f"RTHs in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.update_traces( textposition='outside')

fig.show()

In [25]:
fig = px.line(
    df, y=["rth_p_cumsum", "rth_p_reverse_cumsum"], x=list(df.index), 
    labels={
        'x':'Length Of Stay', 
    }, 
    height=750, 
    title=f"Cumulative RTH Percentage in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['title'] = "RTH Percentage"
fig['layout']['yaxis']['tickformat'] = ',.0%'
fig['layout']['yaxis']['range'] = [0,1]


new_names= ['RTH Percentage at LOS <= x axis', 'RTH Percentage at LOS >= x axis']
for i, new_name in enumerate(new_names):
    fig.data[i].name = new_name

fig.show()

## Caught RTH

In [40]:
fig = px.bar(
    temp_df, 
    x='caught_rth', 
    y=list(temp_df.index),
    height=graph_height, orientation='h',
    title=f'Caught RTHs in {CLIENT} all facilities, {date_range}',
    labels={
        'y':'Length Of Stay', 
        'caught_rth': 'Caught RTH'
    }, 
    color_discrete_sequence=['green']
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

## Missed RTH

In [41]:
fig = px.bar(
    temp_df, 
    x='missed_rth', 
    y=list(temp_df.index),
    height=graph_height, orientation='h',
    title=f'Missed RTHs in {CLIENT} all facilities, {date_range}',
    labels={
        'y':'Length Of Stay', 
        'missed_rth': 'Missed RTH'
    }, 
    color_discrete_sequence=['yellow']
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

## UnRanked RTH

In [28]:
fig = px.bar(
    df, 
    x='unranked_rth', 
    y=list(df.index),
    height=graph_height, orientation='h',
    title=f'Unranked RTHs in {CLIENT} all facilities, {date_range}',
    labels={
        'y':'Length Of Stay', 
        'unranked_rth': 'Unranked RTH'
    }, 
    color_discrete_sequence=['red']
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

## Harsh Recall "<=0” “<=1”, “<=2”, …, “<=119"

In [29]:
y = [f'<={i}' for i in range(0,121)]
y[120] = 'All'

fig = px.bar(
    df, x="fraction_rth", y=y, 
    labels={
        'y':'Length Of Stay', 
        'fraction_rth': 'Recall'
    }, 
    height=graph_height, orientation='h',
    title=f"Recall (harsh def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['tickformat'] = ',.0%'
fig['layout']['xaxis']['range'] = [0,1]
fig.update_traces( textposition='outside')

fig.show()


## Harsh recall ">=1”, “>=2”, “>=3”, … “>=119”, “>=120”

In [30]:
y = [f'>={i}' for i in range(0,121)]
y[0] = 'All'

plt.show()

fig = px.bar(
    df, x="reverse_fraction_rth", y=y, 
    labels={
        'y':'Length Of Stay', 
        'reverse_fraction_rth': 'Recall'
    }, 
    height=graph_height, orientation='h',
    title=f"Recall (harsh def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['tickformat'] = ',.0%'
fig['layout']['xaxis']['range'] = [0,1]
fig.update_traces( textposition='outside')

fig.show()

## Combined Cumulative Recall

In [31]:
fig = px.line(
    df, y=["fraction_rth", "reverse_fraction_rth"], x=list(df.index), 
    labels={
        'x':'Length Of Stay', 
    }, 
    height=750, 
    title=f"Recall (harsh def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['title'] = "Recall"
fig['layout']['yaxis']['tickformat'] = ',.0%'
fig['layout']['yaxis']['range'] = [0,1]
new_names= ['Recall at LOS <= x axis', 'Recall at LOS >= x axis']
for i, new_name in enumerate(new_names):
    fig.data[i].name = new_name

fig.show()

##  Row with largest difference between Cumsum and Reverse Cumsum recall

In [32]:
idx = df['rth_diff'].idxmax()
print(f'Index: {idx}')
df.loc[idx]

Index: 82


rth                     1          
caught_rth              0          
missed_rth              1          
ranked_rth              1          
unranked_rth            0          
rth_p                   0.000834028
caught_rth_p            0          
missed_rth_p            0.191939   
unranked_rth_p          0          
reverse_fraction_rth    0.21021    
fraction_rth            0.564014   
rth_p_cumsum            0.723103   
rth_p_reverse_cumsum    0.277731   
rth_pct                 0.08%      
rth_diff                0.353804   
Name: 82, dtype: object

## ==================== Lenient way for Recall ==================== 

In [33]:
query = f"""with rh as (
    select ht.*,
           fa.facilityname,
           fp.masterpatientid,
           dp.modelid,
           dp.group_rank,
           dp.show_in_report,
           fp.patientmrn,
           fp.firstname,
           fp.lastname
    from public.hospital_transfers ht
             left join public.facility_patient fp
                       on ht.client = fp.client
                           and ht.facilityid = fp.facilityid
                           and ht.patientid = fp.patientid
             left join daily_predictions dp
                       on ht.client = dp.client
                           and ht.facilityid = dp.facilityid
                           and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 3
                           and date(dp.censusdate) <= date(ht.dateoftransfer)
                           and fp.masterpatientid = dp.masterpatientid
             left join facility fa
                       on fa.facilityid = ht.facilityid
                           and fa.client = ht.client
    where ht.client = '{CLIENT}'
      and ht.dateoftransfer between '{TRAIN_START_DATE}' and '{TEST_END_DATE}'
      and (lower(ht.payerdescription) NOT LIKE '%%hospice%%' or ht.payerdescription is null)
      and (ht.outcome <> 'ED Visit Only' or ht.outcome is null)
      and ht.planned = 'No'
      and fa.is_active=True
      and (ht.facilityid IN {FACILITYIDS} OR {ENABLE_ALL_FACILITIES})
)
SELECT rh.client,
       rh.facilityid,
       rh.facilityname,
       rh.patientid,
       rh.masterpatientid,
       rh.patientmrn,
       rh.lastname,
       rh.firstname,
       rh.dateoftransfer,
       rh.planned,
       rh.transferreason,
       rh.otherreasonfortransfer,
       rh.outcome,
       rh.transferredto,
       rh.lengthofstay,
       rh.lastadmissiondate,
       rh.payertype,
       rh.payerdescription,
       min(group_rank) as best_exp_rank,
       bool_or(rh.show_in_report) as show_in_report,
       string_agg(distinct rh.modelid, ',') as modelids,
       -- count of how many predictions were made for that day (the number of rows that were grouped)
       -- have to special case for when we made no predictions because there would be still be 1 row
       (CASE
            WHEN bool_or(rh.show_in_report) IS NULL
                THEN 0
            ELSE count(*)
           END
           )  as num_predictions
FROM rh
GROUP BY rh.client, rh.facilityid, rh.facilityname,
         rh.patientid, rh.masterpatientid, rh.patientmrn, rh.lastname, rh.firstname, rh.dateoftransfer,
         rh.planned, rh.transferreason, rh.otherreasonfortransfer, rh.outcome,
         rh.transferredto, rh.lengthofstay, rh.payertype, rh.payerdescription, rh.lastadmissiondate
"""

l_rdf = pd.read_sql(query, con=saiva_engine)

print(l_rdf.shape)
l_rdf.head()

(1199, 22)


Unnamed: 0,client,facilityid,facilityname,patientid,masterpatientid,patientmrn,lastname,firstname,dateoftransfer,planned,transferreason,otherreasonfortransfer,outcome,transferredto,lengthofstay,lastadmissiondate,payertype,payerdescription,best_exp_rank,show_in_report,modelids,num_predictions
0,avante,1,Avante at Boca Raton,177103,75244.0,7550,FRANK,JERROLD,2021-07-19,No,Other,Pacemaker,"Admitted, Inpatient",Holy Cross Hospital,800.0,2019-05-11 18:58:00,Medicaid,MCD SUNSHINE,39.0,False,ebce6fa9bdcb4040be48b756d63e4c8a,4
1,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-02,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,57.0,2020-12-07 14:55:00,Medicaid,Medicaid Pending,7.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,4
2,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-10,No,Chest Pain,,"Admitted, Inpatient",Boca Raton Hospital,4.0,2021-02-06 17:30:00,Medicaid,Medicaid Pending,2.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,4
3,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-02-23,No,Other,Respiratory Distress,"Admitted, Status Uncertain",Boca Raton Hospital,6.0,2021-02-17 17:00:00,Medicaid,Medicaid Pending,1.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,4
4,avante,1,Avante at Boca Raton,330601,119814.0,8827,JEAM,ONEL,2021-03-10,No,"Shortness of Breath (bronchitis, pneumonia)",,"Admitted, Inpatient",Boca Raton Hospital,9.0,2021-03-01 15:14:00,Medicaid,Medicaid Pending,3.0,True,d0c497c8b9b04f4d9e1e1e0c9297cc1f,4


In [34]:
l_rdf = pre_process(l_rdf)
l_df,l_rth,l_caught_rth, l_missed_rth, l_unranked_rth, l_ranked_rth  = get_metrics_df(l_rdf)

# Compute Lenient recall 
l_cumsum_df = l_df[['caught_rth','ranked_rth']].cumsum()
l_cumsum_df['fraction_rth'] = l_cumsum_df['caught_rth'] / l_cumsum_df['ranked_rth'] ## Lenient def for recall

print('********************1')

l_reverse_cumsum_df = pd.DataFrame()
l_reverse_cumsum_df['caught_rth'] = l_df.loc[::-1, 'caught_rth'].cumsum()[::-1]
l_reverse_cumsum_df['ranked_rth'] = l_df.loc[::-1, 'ranked_rth'].cumsum()[::-1]
l_reverse_cumsum_df['reverse_fraction_rth'] = l_reverse_cumsum_df['caught_rth'] / l_reverse_cumsum_df['ranked_rth']  ## Lenient def

l_df['reverse_fraction_rth'] = l_reverse_cumsum_df['reverse_fraction_rth']
l_df['fraction_rth'] = l_cumsum_df['fraction_rth']

l_df.head()

********************1


Unnamed: 0,rth,caught_rth,missed_rth,ranked_rth,unranked_rth,rth_p,caught_rth_p,missed_rth_p,unranked_rth_p,reverse_fraction_rth,fraction_rth
0,8,1,2,3,5,0.006672,0.154799,0.420168,6.493506,0.575758,0.333333
1,52,11,21,32,20,0.043369,1.702786,4.411765,25.974026,0.576408,0.342857
2,44,25,18,43,1,0.036697,3.869969,3.781513,1.298701,0.583257,0.474359
3,37,30,6,36,1,0.030859,4.643963,1.260504,1.298701,0.583333,0.587719
4,26,21,5,26,0,0.021685,3.250774,1.05042,0.0,0.574405,0.628571


## Lenient Recall "<=0” “<=1”, “<=2”, …, “<=119"

In [35]:
y = [f'<={i}' for i in range(0,121)]
y[120] = 'All'

fig = px.bar(
    l_df, x="fraction_rth", y=y, 
    labels={
        'y':'Length Of Stay', 
        'fraction_rth': 'Recall'
    }, 
    height=750, orientation='h',
    title=f"Recall (lenient def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['tickformat'] = ',.0%'
fig['layout']['xaxis']['range'] = [0,1]
fig.update_traces( textposition='outside')

fig.show()


## Lenient Recall ">=1”, “>=2”, “>=3”, … “>=119”, “>=120”

In [36]:
y = [f'>={i}' for i in range(0,121)]
y[0] = 'All'

y = [f'>={i}' for i in range(0,121)]
y[0] = 'All'

plt.show()

fig = px.bar(
    l_df, x="reverse_fraction_rth", y=y, 
    labels={
        'y':'Length Of Stay', 
        'reverse_fraction_rth': 'Recall'
    }, 
    height=750, orientation='h',
    title=f"Recall(lenient def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['tickformat'] = ',.0%'
fig['layout']['xaxis']['range'] = [0,1]
fig.update_traces( textposition='outside')

fig.show()

## Cumulative Combined Recall

In [37]:
fig = px.line(
    l_df, y=["fraction_rth", "reverse_fraction_rth"], x=list(l_df.index), 
    labels={
        'x':'Length Of Stay', 
    }, 
    height=750, 
    title=f"Recall (lenient def) in {CLIENT} all facilities, {date_range}"
)
fig['layout']['yaxis']['title'] = "Recall"
fig['layout']['yaxis']['tickformat'] = ',.0%'
fig['layout']['yaxis']['range'] = [0,1]
new_names= ['Recall at LOS <= x axis', 'Recall at LOS >= x axis']
for i, new_name in enumerate(new_names):
    fig.data[i].name = new_name

fig.show()