In [None]:
!pip install matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
import gc
import sys
from pathlib import Path
sys.path.insert(0, '/src')

from utils.database import DbEngine
from utils.load_data import DataLoader, PatientCensus
from datetime import timedelta
import timeit
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import random
import plotly.express as px
import plotly.figure_factory as ff

from pylab import *
from sklearn.cluster import KMeans
from scipy.stats import gaussian_kde
from sklearn.neighbors import KernelDensity

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [None]:
import os

CLIENT = 'trio'
clientClass = get_client_class(client=CLIENT)
START_DATE, END_DATE = '2021-01-01', '2021-05-18'

print(CLIENT)
print(os.environ.get('SAIVA_ENV','dev'))
print(START_DATE, END_DATE)

In [None]:
engine = DbEngine()
saiva_engine = engine.get_postgresdb_engine()
client_sql_engine = engine.get_sqldb_engine(clientdb_name=CLIENT)

In [None]:
# verify connectivity
engine.verify_connectivity(client_sql_engine)

In [None]:

query=f"""
        select fp.masterpatientid,ht.* from hospital_transfers ht
        left join public.facility_patient fp
                       on ht.client = fp.client
                           and ht.facilityid = fp.facilityid
                           and ht.patientid = fp.patientid
            where ht.dateoftransfer between '{START_DATE}' and '{END_DATE}'
            and ht.client='{CLIENT}'
        """

transfer_df = pd.read_sql(query, con=saiva_engine)

transfered_masterpatient_ids = tuple(transfer_df['masterpatientid'].unique())
print(transfer_df.shape)
print(f'Unique Masterpatients: {len(transfered_masterpatient_ids)}')
transfer_df.head(2)

In [None]:
"""
NON-TRANSFERED PATIENTS
"""

transfered_masterpatient_ids = tuple(transfer_df['masterpatientid'])

query=f"""
        select dc.censusdate,fp.* from view_ods_daily_census_v2 dc 
        JOIN view_ods_facility_patient fp 
        ON (fp.patientid = dc.patientid and fp.facilityid = dc.facilityid)
            where dc.censusdate between '{START_DATE}' and '{END_DATE}'
            and fp.masterpatientid not in {transfered_masterpatient_ids}
        """

safe_df = pd.read_sql(query, con=client_sql_engine)

safe_masterpatient_ids = tuple(safe_df['masterpatientid'].unique())
print(f'Unique Masterpatients: {len(safe_masterpatient_ids)}')

print(safe_df.shape)
safe_df.head(2)

### ========================================================================

In [None]:
query=f"""
with rh as (
    select ht.*,
           fa.facilityname,
           fp.masterpatientid,
           dp.modelid,
           mm.modeldescription,
           dp.group_rank,
           dp.show_in_report,
           fp.patientmrn,
           fp.firstname,
           fp.lastname
    from public.hospital_transfers ht
             left join public.facility_patient fp
                       on ht.client = fp.client
                           and ht.facilityid = fp.facilityid
                           and ht.patientid = fp.patientid
             left join daily_predictions dp
                       on ht.client = dp.client
                           and ht.facilityid = dp.facilityid
                           and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 3
                           and date(dp.censusdate) <= date(ht.dateoftransfer)
                           and fp.masterpatientid = dp.masterpatientid
             left join model_metadata mm
                       on dp.modelid = mm.modelid
             left join facility fa
                       on fa.facilityid = ht.facilityid
                           and fa.client = ht.client
    where (dp.published = True or dp.published is null)
      and ht.dateoftransfer >= '{START_DATE}'
      and (dp.experiment_group = True or dp.experiment_group is null)
      and (lower(ht.payerdescription) NOT LIKE '%%hospice%%' or ht.payerdescription is null)
      and ht.client = '{CLIENT}'
)
SELECT 
       rh.facilityid,
       rh.facilityname,
       rh.dateoftransfer,
       rh.masterpatientid,
       rh.patientmrn,
       rh.lengthofstay,      
       rh.transferreason,
       rh.otherreasonfortransfer,
       rh.lastname,
       rh.firstname,
       rh.planned,
       rh.modeldescription,
       rh.outcome,
       rh.patientid,
       rh.transferredto,
       rh.client,
       rh.payertype,
       rh.payerdescription,
       min(group_rank) as best_exp_rank,        
       bool_or(rh.show_in_report) as show_in_report,
       -- count of how many predictions were made for that day (the number of rows that were grouped)
       -- have to special case for when we made no predictions because there would be still be 1 row
       (CASE
            WHEN bool_or(rh.show_in_report) IS NULL
                THEN 0
            ELSE count(*)
           END
           ) as num_predictions 
FROM rh
GROUP BY rh.client, rh.facilityid, rh.facilityname, rh.modelid, rh.modeldescription,
         rh.patientid, rh.masterpatientid, rh.patientmrn, rh.lastname, rh.firstname, rh.dateoftransfer,
         rh.planned, rh.transferreason, rh.otherreasonfortransfer, rh.outcome,
         rh.transferredto, rh.lengthofstay, rh.payertype, rh.payerdescription
        """

df = pd.read_sql(query, con=saiva_engine)
df = df[~df['facilityname'].isna()]

print(f'Unique Masterpatients: {len(df["masterpatientid"].unique())}')
print(df.shape)

In [None]:
missed_df = df[df['num_predictions'] == 0]
print(missed_df.shape)

missed_masterpatientids = tuple(dict(missed_df['masterpatientid'].value_counts( normalize=False, sort=True, ascending=False, dropna=True)).keys())

predicted_df = df[df['num_predictions'] > 0]
print(predicted_df.shape)

predicted_masterpatientids = tuple(dict(predicted_df['masterpatientid'].value_counts( normalize=False, sort=True, ascending=False, dropna=True)).keys())


In [None]:
missed_df['transferreason'].value_counts( normalize=False, sort=True, ascending=False, bins=None, dropna=True).nlargest(10)

In [None]:
predicted_df['transferreason'].value_counts( normalize=False, sort=True, ascending=False, bins=None, dropna=True).nlargest(10)

In [None]:
missed_df['transferreason'].unique()

In [None]:
predicted_df['transferreason'].unique()

## ===========================================================

In [None]:
condition = (missed_df['dateoftransfer'] == '2021-04-07')

_df = missed_df[condition].sort_values(by=['dateoftransfer'], ascending=False)
missed_masterpatientids = tuple(_df['masterpatientid'])
_df.head(5)

In [None]:
condition = (predicted_df['dateoftransfer'] == '2021-04-07') 

_df = predicted_df[condition].sort_values(by=['dateoftransfer'], ascending=False)
predicted_masterpatientids = tuple(_df['masterpatientid'])
_df.head(5)

### =================================================================

In [None]:
tp_data = DataLoader(client_sql_engine=client_sql_engine, 
                     masterpatientid_list=transfered_masterpatient_ids)
sf_data = DataLoader(client_sql_engine=client_sql_engine, 
                     masterpatientid_list=safe_masterpatient_ids)


In [None]:
census_date = pd.to_datetime('2021-01-14')

tp_data.demo_df['age'] = tp_data.demo_df['dateofbirth'].apply(
    lambda born: census_date.year - born.year - ((census_date.month, census_date.day) < (born.month, born.day))
)
sf_data.demo_df['age'] = sf_data.demo_df['dateofbirth'].apply(
    lambda born: census_date.year - born.year - ((census_date.month, census_date.day) < (born.month, born.day))
)

## Safe / transfered people ratio from different states 

In [None]:
# tp_data.demo_df['state'].value_counts( normalize=False, sort=True, ascending=False, bins=None, dropna=True).plot(kind='barh')

total_rows = tp_data.demo_df.shape[0]
tp_data.demo_df.groupby('state')['state'].count().apply(lambda x: (x*100)/total_rows).plot(kind='barh')

In [None]:
# sf_data.demo_df['state'].value_counts( normalize=False, sort=True, ascending=False, bins=None, dropna=True).plot(kind='barh')

total_rows = sf_data.demo_df.shape[0]
sf_data.demo_df.groupby('state')['state'].count().apply(lambda x: (x*100)/total_rows).plot(kind='barh')

## Avg age of patients facility wise

In [None]:
tp_data.demo_df.groupby('facilityid')['age'].mean().plot(kind='barh')

In [None]:
sf_data.demo_df.groupby('facilityid')['age'].mean().plot(kind='barh',figsize=(10, 8))

## Ratio of transfers state wise

In [None]:
# Dict of total tranfers state wise
total_count = dict(tp_data.demo_df.groupby('state')['state'].count())

# combine safe and transfered patients
_df = pd.concat([tp_data.demo_df, sf_data.demo_df])

# get ratio of transfered patients state wise
_df.groupby('state').apply(lambda x: (100*total_count.get(x.name,0)) / x['state'].count()).plot(kind='barh')


## % of alerts for safe & transfered patients

In [None]:
total_rows = sf_data.alt_df.shape[0]
sf_data.alt_df['alertdescription'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).apply(lambda x: (x*100)/total_rows).plot(kind='barh')

In [None]:
total_rows = tp_data.alt_df.shape[0]
tp_data.alt_df['alertdescription'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).apply(lambda x: (x*100)/total_rows).plot(kind='barh')

In [None]:
total_rows = tp_data.dg_df.shape[0]
print(total_rows)
tp_data.dg_df['diagnosisdesc'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).apply(lambda x: (x*100)/total_rows).plot(kind='barh',figsize=(15, 10))


In [None]:
total_rows = sf_data.dg_df.shape[0]
print(total_rows)
sf_data.dg_df['diagnosisdesc'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).apply(lambda x: (x*100)/total_rows).plot(kind='barh',figsize=(15, 10))


## =================Compare data for a specific date ==================

In [None]:
census_date = '2021-01-14'
sample_size = 25
month_back_date = pd.to_datetime(census_date) - timedelta(days=30)
fortnight_back_date = pd.to_datetime(census_date) - timedelta(days=15)
week_back_date = pd.to_datetime(census_date) - timedelta(days=7)
day3_back_date = pd.to_datetime(census_date) - timedelta(days=3)

_df = transfer_df[transfer_df['dateoftransfer'] == census_date].sort_values(by=['dateoftransfer'], ascending=False)
_transfered_masterpatient_ids = tuple(_df['masterpatientid'].unique())
print(f'Unique Masterpatients in Transfer patients: {len(_transfered_masterpatient_ids)}')

_df = safe_df[safe_df['censusdate'] == census_date].sort_values(by=['censusdate'], ascending=False)
_safe_masterpatient_ids = tuple(_df['masterpatientid'].unique())
_safe_masterpatient_ids = random.sample(_safe_masterpatient_ids, sample_size)
print(f'Unique Masterpatients in Safe patients: {len(_safe_masterpatient_ids)}')


In [None]:
tp_data = DataLoader(masterpatientid_list=_transfered_masterpatient_ids, census_date=census_date)
sf_data = DataLoader(masterpatientid_list=_safe_masterpatient_ids, census_date=census_date)

In [None]:
# Count of diagnosis per patient for last 1 week for a given date

condition = sf_data.dg_df['onsetdate'] > week_back_date

# sf_data.dg_df[condition].groupby(['masterpatientid'])['diagnosisdesc'].count().plot(kind='bar',figsize=(10, 6))
    
sf_data.dg_df[condition].groupby(['masterpatientid','diagnosisdesc'])['diagnosisdesc'].count().plot(kind='barh',figsize=(15,15))    


In [None]:
# Count of diagnosis per patient for last 1 week before the transfered date

condition = tp_data.dg_df['onsetdate'] > week_back_date

# sf_data.dg_df[condition].groupby(['masterpatientid'])['diagnosisdesc'].count().plot(kind='bar',figsize=(10, 6))
    
tp_data.dg_df[condition].groupby(['masterpatientid','diagnosisdesc'])['diagnosisdesc'].count().plot(kind='barh',figsize=(15,15))    


In [None]:
# Count of alerts per patient for last 1 week before transfer

condition = tp_data.alt_df['createddate'] > day3_back_date

tp_data.alt_df[condition].groupby(['masterpatientid'])['alertdescription'].count().plot(kind='bar',figsize=(10, 6))
    

In [None]:
# Count of alerts per patient for last 1 week before transfer

condition = sf_data.alt_df['createddate'] > day3_back_date

sf_data.alt_df[condition].groupby(['masterpatientid'])['alertdescription'].count().plot(kind='bar',figsize=(15, 15))
    

In [None]:
# Last one week alerts for all safe patients for a given day

condition = sf_data.alt_df['createddate'] > week_back_date

sf_data.alt_df[condition]['alertdescription'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).plot(kind='barh',figsize=(15, 15))


In [None]:
# Last one week alerts for all transfered patients for a given day

condition = tp_data.alt_df['createddate'] > week_back_date

tp_data.alt_df[condition]['alertdescription'].value_counts( 
    normalize=False, 
    sort=True, 
    ascending=False, 
    bins=None, 
    dropna=True
   ).nlargest(20).plot(kind='barh',figsize=(15, 15))


In [None]:
# Count of order per patient for last 1 week before transfer

condition = tp_data.ord_df['orderdate'] > day3_back_date

tp_data.ord_df[condition].groupby(['masterpatientid','orderdescription'])['orderdescription'].count().plot(kind='barh',figsize=(15, 10))


In [None]:
# Count of order per patient for last 1 week before transfer

condition = sf_data.ord_df['orderdate'] > day3_back_date

sf_data.ord_df[condition].groupby(['masterpatientid','orderdescription'])['orderdescription'].count().plot(kind='barh',figsize=(15, 10))


In [None]:
# Count of alerts per patient for last 1 week before transfer

condition = tp_data.vital_df['date'] > day3_back_date

tp_data.vital_df[condition].groupby(['masterpatientid','vitalsdescription'])['value'].mean().plot(kind='barh',figsize=(15,15))
    
    

In [None]:
_df = transfer_df[transfer_df['dateoftransfer'] == census_date].sort_values(by=['dateoftransfer'], ascending=False)

_df.head(20)