## SET the CLIENT once in shared/constants/CLIENT before executing 

In [None]:
import sys
import pandas as pd
sys.path.insert(0, '/src')
from shared.load_raw_data import fetch_training_data, fetch_training_cache_data
from shared.database import DbEngine
from shared.utils import get_client_class
from shared.constants import CLIENT
from eliot import to_file
to_file(sys.stdout)

In [None]:
import os

clientClass = get_client_class(client=CLIENT)
TRAIN_START_DATE, TRAIN_END_DATE = getattr(clientClass(), 'get_training_dates')()

print(CLIENT)
print(os.environ.get('SAIVA_ENV','dev'))
print(TRAIN_START_DATE, TRAIN_END_DATE)

### =============== Delete all files inside /data/processed & /data/raw ==================

In [None]:
import shutil
from pathlib import Path

# processed_path = Path('/data/processed')
# raw_path = Path('/data/raw')
# shutil.rmtree(processed_path)
# shutil.rmtree(raw_path)
# print("Success.......")

### ======================== Load Database ========================

In [None]:
engine = DbEngine()
saiva_engine = engine.get_postgresdb_engine()
client_sql_engine = engine.get_sqldb_engine(clientdb_name=CLIENT)

In [None]:
# verify connectivity
engine.verify_connectivity(client_sql_engine)

### ======================== Fetch Data ============================

In [None]:
# Loads the data from SQL db and store them in local directory as cache

result_dict = fetch_training_data(
    client=CLIENT, 
    client_sql_engine=client_sql_engine, 
    train_start_date=TRAIN_START_DATE, 
    test_end_date=TRAIN_END_DATE
)

In [None]:
print('master_patient_lookup', result_dict['master_patient_lookup'].shape)
print('patient_census',result_dict['patient_census'].shape)
print('patient_rehosps',result_dict['patient_rehosps'].shape)
print('patient_demographics',result_dict['patient_demographics'].shape)
print('patient_diagnosis',result_dict['patient_diagnosis'].shape)
print('patient_vitals',result_dict['patient_vitals'].shape)
print('patient_meds',result_dict['patient_meds'].shape)
print('patient_orders',result_dict['patient_orders'].shape)
print('patient_alerts',result_dict['patient_alerts'].shape)
print('patient_progress_notes',result_dict['patient_progress_notes'].shape)
if not result_dict.get('patient_lab_results', pd.DataFrame()).empty:
    print('patient_lab_results',result_dict['patient_lab_results'].shape)
print(result_dict.keys())

# have a max of 15042 master_patient_lookup rows ie. Infinity-Infinity

### ==================== If Multiple clients data need to be merged ====================

In [None]:
# Loads the data from SQL db for multiple clints and store them in local directory as cache

# for client in ['avante','gulfshore','palmgarden']:
#     print(f'*********************** Processing for {client} ******************************')
#     clientClass = get_client_class(client)
#     TRAIN_START_DATE, TRAIN_END_DATE = getattr(clientClass(), 'get_training_dates')()
#     print(TRAIN_START_DATE, TRAIN_END_DATE)
    
#     engine = DbEngine()
#     saiva_engine = engine.get_postgresdb_engine()
#     client_sql_engine = engine.get_sqldb_engine(clientdb_name=client)
#     engine.verify_connectivity(client_sql_engine)
#     result_dict = fetch_training_data(client, client_sql_engine, TRAIN_START_DATE, TRAIN_END_DATE)
    
#     print('master_patient_lookup', result_dict['master_patient_lookup'].shape)
#     print('patient_census',result_dict['patient_census'].shape)
#     print('patient_rehosps',result_dict['patient_rehosps'].shape)
#     print('patient_demographics',result_dict['patient_demographics'].shape)
#     print('patient_diagnosis',result_dict['patient_diagnosis'].shape)
#     print('patient_vitals',result_dict['patient_vitals'].shape)
#     print('patient_meds',result_dict['patient_meds'].shape)
#     print('patient_orders',result_dict['patient_orders'].shape)
#     print('patient_alerts',result_dict['patient_alerts'].shape)
#     print('patient_progress_notes',result_dict['patient_progress_notes'].shape)
#     if not result_dict.get('patient_lab_results', pd.DataFrame()).empty:
#         print('patient_lab_results',result_dict['patient_lab_results'].shape)
#     print(result_dict.keys())

### ======================== TESTING ==========================

In [None]:
# Once fetch_training_data loads the data, use the same cache 

# result_dict = fetch_training_cache_data(CLIENT)

# print('master_patient_lookup', result_dict['master_patient_lookup'].shape)
# print('patient_census',result_dict['patient_census'].shape)
# print('patient_rehosps',result_dict['patient_rehosps'].shape)
# print('patient_demographics',result_dict['patient_demographics'].shape)
# print('patient_diagnosis',result_dict['patient_diagnosis'].shape)
# print('patient_vitals',result_dict['patient_vitals'].shape)
# print('patient_meds',result_dict['patient_meds'].shape)
# print('patient_orders',result_dict['patient_orders'].shape)
# print('patient_alerts',result_dict['patient_alerts'].shape)
# print('patient_progress_notes',result_dict['patient_progress_notes'].shape)
# if not result_dict.get('patient_lab_results', pd.DataFrame()).empty:
#     print('patient_lab_results',result_dict['patient_lab_results'].shape)
# print(result_dict.keys())

# have a max of 15042 master_patient_lookup rows ie. Infinity-Infinity

In [None]:
# TESTING specific queries

# query=f"""
#         select distinct patientid, facilityid, orderdate, gpiclass, 
#         gpisubclassdescription, orderdescription, pharmacymedicationname, a.PhysiciansOrderID
#         from view_ods_physician_order_list_v2 a
#         inner join view_ods_physician_order_list_med b
#         on a.PhysiciansOrderID = b.PhysiciansOrderID 
#         where orderdate between '{train_start_date}' and '{test_end_date}';
#         """

# df = pd.read_sql(query, con=client_sql_engine)
# print(df.shape)
# df.head()