In [None]:
import pandas as pd
import boto3
import json
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL

In [None]:
# adjust the stardate, end date , client and facilities accordingly.

START_DATE = '2020-09-04'
END_DATE = '2020-09-21'
CLIENT = 'trio'
FACILITY_IDS = '(1, 7, 21, 42, 52, 55, 186, 194, 265, 273, 274, 275, 276, 277, 278, 279)'


In [None]:
# select the model ids of prod and staging accordingly.

PROD_MODEL_IDS = ('73861fd9a0a5485cb3deccf816a15c7b',
                  '31edea3de43f4721bb925c5f146a3189',
                  'e9f2b07d26984dcfbdaff0a86f033e36',
                  'daac8b0f079d487d96046e9dff6efe84',
                  'c77d3159cc044c14bf15da77eb889a17')

STAGING_MODEL_IDS = ('71f1c512d7ee4c18994f5426dda67172',
                     '270cc0c6b3d240c88ac15ecab24e6790',
                     '49d6cc5035354a958f405db8cd7f8beb',
                     '647740c9d0df4de8963f8cf8ce03f909')

DEV_MODEL_IDS =     ('418e412613f84b308ef88c522decbcbc',
                     '44f99f4cebf64128bd430382cf7c0a14',
                     '5b21bc80dec24ed28ecd54f5219139c6',
                     '754618bc685547568f7d12bf7e11c6fd',
                     '17898acf4ba74d1698b3acac6cb29992',
                     'b9fb5010ecc2421ab5e3e7fdf8835e0a',
                     'd3c0d3d335ec483da652665221aabf04',
                     'a06f7d2408324abdac6cbca9fc1b7e7d')

In [None]:
# connecting with postgres db

session = boto3.session.Session()
secrets_client = session.client( service_name='secretsmanager', region_name='us-east-1')

def get_secrets(secret_name):
    """
    :return: Based on the environment get secrets for
    Client SQL db & Postgres Saivadb
    """
    db_info = json.loads(
        secrets_client.get_secret_value(SecretId=secret_name)[
            'SecretString'
        ]
    )
    return db_info


def get_postgresdb_engine(env):
    """
    Based on the environment connects to the respective database
    :param client: client name
    :return: Saivadb Postgres engine
    """
    # Fetch credentials from AWS Secrets Manager
    postgresdb_info = get_secrets(secret_name=f'{env}-saivadb')
    # Create DB URL
    saivadb_url = URL(
        drivername='postgresql',
        username=postgresdb_info['username'],
        password=postgresdb_info['password'],
        host=postgresdb_info['host'],
        port=postgresdb_info['port'],
        database=postgresdb_info['dbname'],
    )
    global MODEL_IDS
    if env == 'prod':
        MODEL_IDS = PROD_MODEL_IDS
    elif env == 'staging':
        MODEL_IDS = STAGING_MODEL_IDS
    elif env == 'dev':
        MODEL_IDS = DEV_MODEL_IDS
        
    return create_engine(saivadb_url, echo=False)


In [None]:
def get_query():
    query = f"""
    select fp.client,
    fp.facilityid,
    fp.patientid,
    fp.masterpatientid,
    fp.patientmrn,
    ht.dateoftransfer,
    dp.modelid,
    min(experiment_group_rank) as best_exp_rank,
    CASE
        WHEN min(experiment_group_rank) <= 15
            THEN 1
            ELSE 0
    END   as rank_less_than_equal_15,
    ht.planned,
    ht.transferreason,
    ht.otherreasonfortransfer,
    ht.outcome,
    ht.transferredto,
    ht.lengthofstay
    from hospital_transfers ht
    left join public.facility_patient fp
    on ht.client = fp.client
    and ht.facilityid = fp.facilityid
    and ht.patientid = fp.patientid
    left join daily_predictions dp
    on ht.client = dp.client
         and ht.facilityid = dp.facilityid
         and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 3
         and dp.modelid in {MODEL_IDS}
         and date(dp.censusdate) <= date(ht.dateoftransfer)
         and fp.masterpatientid = dp.masterpatientid
    where ht.client = '{CLIENT}'
    and ht.facilityid in {FACILITY_IDS}
    and ht.dateoftransfer >= '{START_DATE}'
    and ht.dateoftransfer <= '{END_DATE}'
    group by fp.client, fp.facilityid, fp.patientid, fp.masterpatientid, fp.patientmrn,
           ht.dateoftransfer, dp.modelid,
           ht.planned, ht.transferreason, ht.otherreasonfortransfer,
           ht.outcome, ht.transferredto, ht.lengthofstay
    """
    return query

In [None]:
def prod_output():
    saiva_engine = get_postgresdb_engine('prod')
    prod_query = get_query()
    return pd.read_sql(prod_query, saiva_engine)
prod =  prod_output()
prod.head()

In [None]:
def staging_output():
    saiva_engine = get_postgresdb_engine('staging')
    staging_query = get_query()
    return pd.read_sql(staging_query, saiva_engine)
staging =  staging_output()
staging.head()

In [None]:
def dev_output():
    saiva_engine = get_postgresdb_engine('dev')
    dev_query = get_query()
    return pd.read_sql(dev_query, saiva_engine)
dev =  dev_output()
dev.head()

In [None]:
len(dev)

In [None]:
len(prod)

In [None]:
assert(len(dev) == len(prod))

In [None]:
# this function computes statistics on the recall

def result_generate(df, env):
    # find total transfers
    total_transfers_df = df.groupby(['facilityid']).size().reset_index(name=f'{env}_total_transfers').sort_values(['facilityid'])
    
    # find stats on how many we "captured" i.e. less_than_equal_15 == 1 
    captured_df = df[df.rank_less_than_equal_15==1].groupby('facilityid').agg({'best_exp_rank':['count','mean','median','std']}).sort_values(['facilityid'])
    
    # rename level 1 columns
    d = dict(zip(captured_df.columns.levels[1], ['count', 'mean_rank', 'median_rank', 'std_rank']))
    captured_df = captured_df.rename(columns=d, level=1)
    
    # rename level 0 columns
    d = dict(zip(captured_df.columns.levels[0], ['captured']))
    captured_df = captured_df.rename(columns=d, level=0)
       
    # join the multi-index column names
    # captured_df.columns = [f'_{env}_'.join(col).strip() for col in captured_df.columns.values]
    # captured_df = captured_df.reset_index()
    
    # rename the multi-index columns
    temp_col_names = ['_'.join(col).strip() for col in captured_df.columns.values]
    new_col_names = [f'{env}_' + name for name in temp_col_names]
    captured_df.columns = new_col_names
    captured_df = captured_df.reset_index()
    
    # join the two df by facilityid
    merged_df = pd.merge(total_transfers_df, captured_df, how='inner', on='facilityid')
    
    merged_df[f'{env}_recall'] = round(merged_df[f'{env}_captured_count']/merged_df[f'{env}_total_transfers'],2)
    merged_df[f'{env}_captured_mean_rank'] = round(merged_df[f'{env}_captured_mean_rank'], 2)
    merged_df[f'{env}_captured_std_rank'] = round(merged_df[f'{env}_captured_std_rank'], 2)
    return merged_df

In [None]:
prod_results = result_generate(prod,'prod') 
prod_results

In [None]:
dev_results = result_generate(dev,'dev')
dev_results

In [None]:
merged_results = pd.merge(prod_results, dev_results, how='inner', on='facilityid')
merged_results['is_dev_better_or_equal'] = merged_results.apply(lambda x: x['dev_recall'] >= x['prod_recall'], axis=1)
merged_subset = merged_results[['facilityid', 'prod_total_transfers', 'dev_total_transfers', 'prod_recall', 'dev_recall', 'is_dev_better_or_equal']]
merged_subset

In [None]:
merged_subset.style.apply(lambda x: ['background: lightgreen' if x.is_dev_better_or_equal 
                              else '' for i in x], 
                   axis=1)