#### Takes 38min 54s to run fully 

In [None]:
import sys
import pandas as pd
from pathlib import Path
sys.path.insert(0, '/src')
from nlp.build_topic_model import TopicModel
from eliot import log_message, to_file
from shared.utils import get_client_class, get_memory_usage
to_file(sys.stdout)

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


In [None]:
# For every customer specific topic model, give the name similar to the client name

raw_path = Path('/data/raw')
client = 'avante'
notes_df = pd.read_parquet(raw_path/'patient_progress_notes.parquet')

In [None]:
%%time

nlp = TopicModel(
    progress_notes_df=notes_df, 
    name=client
)
nlp.execute()

### ================== Print Topics =======================

In [None]:
from nlp.load import NlpFeatureModel

nlp_model = NlpFeatureModel()
model,dictionary = nlp_model.load_topic_model(name='avante')

for idx, topic in model.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

In [None]:
import pandas as pd
import gc
import numpy as np
import sys
from pathlib import Path
sys.path.insert(0, '/src')
from shared.generate_base_features import base_feature_processing
from shared.generate_lab_features import get_lab_features
from shared.load_raw_data import fetch_training_cache_data
from shared.utils import get_client_class, get_memory_usage
from eliot import start_action, start_task, to_file, log_message
to_file(sys.stdout)

In [None]:
# Load the data from local directory cache 

processed_path = Path('/data/processed')
processed_path.mkdir(parents=True, exist_ok=True)

S3_BUCKET = 'saiva-dev-data-bucket'
CLIENT = 'avante'

clientClass = get_client_class(client=CLIENT)

result_dict = fetch_training_cache_data(client=CLIENT, generic=True)
for key, value in result_dict.items():
    print(f'{key} : {result_dict[key].shape}')

In [None]:
rehosps_df = result_dict['patient_rehosps']
rehosps_df.shape

In [None]:
rehosps_df.sort_values(by='dateoftransfer', inplace=True)

In [None]:
rehosps_df['year'] = rehosps_df['dateoftransfer'].dt.strftime("%Y")
rehosps_df['month'] = rehosps_df['dateoftransfer'].dt.strftime("%m")
rehosps_df['year-month'] = rehosps_df['year'] + '-' + rehosps_df['month']

rehosps_df.head()

In [None]:
# count based on transferreason

rehosps_df.groupby("transferreason").count().sort_values(by=['patientid'], ascending=False)

In [None]:
rehosps_df = rehosps_df[(rehosps_df['dateoftransfer'] > '2019-01-01') & (rehosps_df['dateoftransfer'] < '2019-12-31')]

In [None]:
# count per facility per month

df = rehosps_df.groupby(['facilityid','year-month']).count().sort_values(by=['year-month'], ascending=False).reset_index()[['facilityid','year-month','patientid']]
df = df.rename(columns={"patientid": "count"})

In [None]:
df.head()

In [None]:
! pip install matplotlib

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
figure(num=None, figsize=(15, 13), dpi=80, facecolor='w', edgecolor='k')


In [None]:

color = {
    1:'red',
    3:'blue',
    4: 'orange',
    5: 'green',
    6: 'yellow',
    7: 'red',
    8:'blue',
    9: 'orange',
    10:'green',
    13:'yellow',
    21: 'black'
}
facility = [1,3,4,5,6,7,8,9,10,13,21]
for id in facility:
    _df = df[df['facilityid'] == id]
    plt.plot( 'year-month', 'count', 
             data=_df, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color=color[id], 
             linewidth=2,
             label=id)
    
    
    
# show legend
plt.legend(facility)

# show graph
plt.show()

In [None]:
from shared.database import DbEngine

engine = DbEngine()
client_engine = engine.get_sqldb_engine(clientdb_name='avante')

In [None]:


query = """select patientid, facilityid, DateOfAdmission, AdmissionStatus, HospitalDischargeDate,
PayerTypeDescription, AdmissionInEffectiveDate, AdmittedWithinLast30Days, TransferredWithin30DaysOfAdmission
from view_ods_hospital_transfers_admission_log
where DateOfAdmission between '2018-06-01' and '2020-11-31'
and facilityid in (select facilityid from view_ods_facility where lineofbusiness = 'SNF')"""


admission_df = pd.read_sql(query, con=client_engine)


In [None]:
admission_df.head()

In [None]:
admission_df['year'] = admission_df['DateOfAdmission'].dt.strftime("%Y")
admission_df['month'] = admission_df['DateOfAdmission'].dt.strftime("%m")
admission_df['year-month'] = admission_df['year'] + '-' + admission_df['month']

admission_df.head()

In [None]:
# count per facility per month

adf = admission_df.groupby(['facilityid','year-month']).count().sort_values(by=['year-month'], ascending=False).reset_index()[['facilityid','year-month','patientid']]
adf = adf.rename(columns={"patientid": "count"})
adf.head(10)

In [None]:
figure(num=None, figsize=(15, 13), dpi=80, facecolor='w', edgecolor='k')

color = {
    1:'red',
    3:'blue',
    4: 'orange',
    5: 'green',
    6: 'yellow',
    7: 'red',
    8:'blue',
    9: 'orange',
    10:'green',
    13:'yellow',
    21: 'black'
}
facility = [1,3,4,5,6,7,8,9,10,13,21]
for id in facility:
    _df = df[df['facilityid'] == id]
    plt.plot( 'year-month', 'count', 
             data=_df, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color=color[id], 
             linewidth=2,
             label=id)
    
    
    
# show legend
plt.legend(facility)

# show graph
plt.show()

In [None]:
figure(num=None, figsize=(18, 16), dpi=80, facecolor='w', edgecolor='k')

_df = df[df['facilityid'] == 7]
plt.plot( 'year-month', 'count', 
             data=_df, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='red', 
             linewidth=2,
             label=id)
    
_adf = adf[adf['facilityid'] == 7]
plt.plot( 'year-month', 'count', 
             data=_adf, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='blue', 
             linewidth=2,
             label=id)    
    
    
# show legend
plt.legend([7,7])

# show graph
plt.show()

In [None]:
figure(num=None, figsize=(13, 11), dpi=80, facecolor='w', edgecolor='k')
facilityid = 4

_df = df[df['facilityid'] == facilityid]
plt.plot( 'year-month', 'count', 
             data=_df, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='red', 
             linewidth=2,
             label=id)
    
_adf = adf[adf['facilityid'] == facilityid]
plt.plot( 'year-month', 'count', 
             data=_adf, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='blue', 
             linewidth=2,
             label=id)    
    
    
# show legend
plt.legend([7,7])

# show graph
plt.show()

In [None]:
figure(num=None, figsize=(13, 11), dpi=80, facecolor='w', edgecolor='k')
facilityid = 3

_df = df[df['facilityid'] == facilityid]
plt.plot( 'year-month', 'count', 
             data=_df, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='red', 
             linewidth=2,
             label=id)
    
_adf = adf[adf['facilityid'] == facilityid]
plt.plot( 'year-month', 'count', 
             data=_adf, marker='o', 
             markerfacecolor='blue', 
             markersize=7, 
             color='blue', 
             linewidth=2,
             label=id)    
    
    
# show legend
plt.legend([7,7])

# show graph
plt.show()

In [None]:
# count based on transferreason

admission_df.groupby("AdmissionStatus").count().sort_values(by=['patientid'], ascending=False)

In [None]:
admission_df.columns

In [None]:
rehosps_df.columns

In [None]:
print(admission_df.shape)
print(rehosps_df.shape)

In [None]:
m_df = pd.merge(admission_df[admission_df['AdmissionStatus'] == 'Post Acute'], rehosps_df, on=['patientid','facilityid'])
m_df.sort_values(by='DateOfAdmission', inplace=True)
m_df.shape

In [None]:
m_df = pd.merge(admission_df[admission_df['AdmissionStatus'] == 'Chronic Long-Term'], rehosps_df, on=['patientid','facilityid'])
m_df.sort_values(by='DateOfAdmission', inplace=True)
m_df.shape

In [None]:

query = """select fp.patientid, fp.facilityid, fp.masterpatientid, fp.InitialAdmissionDate, fp.Allergies,fp.Education,
al.DateOfAdmission, al.AdmissionStatus   
from view_ods_facility_patient fp
LEFT JOIN view_ods_hospital_transfers_admission_log al 
ON (fp.patientid = al.patientid AND fp.facilityid = al.facilityid AND CAST(fp.InitialAdmissionDate AS DATE) = CAST(al.DateOfAdmission AS DATE))
where fp.facilityid in (select facilityid from view_ods_facility where lineofbusiness = 'SNF')
"""
master_patient_df = pd.read_sql(query, con=client_engine)

master_patient_df.head()

In [None]:
master_patient_df[master_patient_df['DateOfAdmission'].isnull()]