## Migrate client files

In [1]:
import inspect
import sys
sys.path.insert(0, '/src')
import json

import os
from importlib import import_module
from shared.utils import pascal_case
import re
path = '/src/clients'

In [2]:
client_files = [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']

prediction_date = '2022-01-01'
facilityid = '1'
train_start_date = '2021-01-01'

In [3]:
prediction_date = '2022-01-01'
facilityid = '1'
train_start_date = '2021-01-01'


def get_training_queries(*, clientClass):
    try:
        queries = getattr(clientClass(), 'get_training_queries')(train_start_date, train_start_date)
    except:
        queries = getattr(clientClass(), 'get_training_queries')(
            test_end_date=train_start_date, 
            train_start_date=train_start_date,
            excluded_censusactioncodes=['D'],
        )
    return queries


def get_prediction_queries(*, clientClass):
    try:
        queries = getattr(clientClass(), 'get_prediction_queries')(prediction_date, facilityid, train_start_date)
    except:
        queries = getattr(clientClass(), 'get_prediction_queries')(
            prediction_date=prediction_date, 
            facilityid=facilityid, 
            train_start_date=train_start_date,
            excluded_censusactioncodes=['D'],
        )
    return queries


def check_unique_queries(*, data, keep_client_file):
    unique_queries = {}

    for client, queries in data.items():
        if client in keep_client_file:
            continue
        for query_name, query in queries.items():
            query = re.sub(r"\s+", "", query)

            if query_name == 'patient_census':
                query = query.split("andcensusactioncodenotin(", 1)
                query = query[0] + query[1].split(")", 1)[1]

            unique_query = unique_queries.get(query_name, {})
            clients = unique_query.get(query, [])
            clients.append(client)
            unique_query[query] = clients
            unique_queries[query_name] = unique_query

    print(unique_queries.keys())

    for query_name, query in unique_queries.items():
        if len(query) > 1:
            print(query_name, len(query))
            for q, clients in query.items():
                print(clients)
            print('-'*100)

### Setup client names for which we want to keep the files
Update the list of client names in the cell below. The list should contain the names of the clients for which we want to keep the files.
Then run the two cells bellow the list - the otput should be empty. If not, there are some differences between the queries accross clients - please check, if you can update them to be the same, or add the client name to keep_client_file list.

In [4]:
keep_client_file = ['trio', 'communicare', 'creativesolutions', 'avante', 'kopa', 'caregnc', 'heritagemanor', 'pghc', 'sunmar']

In [5]:
# Check training queries

data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = get_training_queries(clientClass=clientClass)

    except Exception as e:
        print(py, e)

check_unique_queries(data=data, keep_client_file=keep_client_file)

dict_keys(['patient_vitals', 'master_patient_lookup', 'patient_census', 'patient_rehosps', 'patient_admissions', 'patient_demographics', 'patient_diagnosis', 'patient_meds', 'patient_orders', 'patient_alerts', 'patient_progress_notes', 'patient_lab_results'])


In [6]:
# Check prediction queries

data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = get_prediction_queries(clientClass=clientClass)

    except Exception as e:
        print(py, e)

check_unique_queries(data=data, keep_client_file=keep_client_file)

dict_keys(['patient_census', 'patient_vitals', 'patient_admissions', 'master_patient_lookup', 'patient_rehosps', 'patient_demographics', 'patient_diagnosis', 'patient_meds', 'patient_orders', 'patient_alerts', 'patient_progress_notes', 'patient_lab_results', 'patient_room_details'])
patient_lab_results 2
['momentum', 'brushillcc', 'pearlhc', 'peacecarestann']
['pacificskilledhc']
----------------------------------------------------------------------------------------------------


Dump missing datasets for clients

In [9]:
data = {}

query_names = [
    'patient_census', 
    'patient_vitals', 
    'patient_admissions', 
    'master_patient_lookup', 
    'patient_rehosps', 
    'patient_demographics', 
    'patient_diagnosis', 
    'patient_meds', 
    'patient_orders', 
    'patient_alerts', 
    'patient_progress_notes', 
    'patient_lab_results', 
    'patient_room_details'
]

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        queries = get_prediction_queries(clientClass=clientClass)

        data[py] = []

        for query_name in query_names:
            query = queries.get(query_name, None)
            if not query:
                data[py].append(query_name)

    except Exception as e:
        print(py, e)

print(json.dumps(data))

{"momentum": [], "brushillcc": [], "pearlhc": [], "peacecarestann": [], "pacificskilledhc": []}


Dump experiment dates

In [10]:
data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = getattr(clientClass(), 'get_experiment_dates')()

    except Exception as e:
        print(py, e)

print(json.dumps(data))

{"momentum": {"train_start_date": "2021-09-01", "train_end_date": "2023-04-16", "validation_start_date": "2023-04-17", "validation_end_date": "2023-07-10", "test_start_date": "2023-07-11", "test_end_date": "2023-10-07"}, "brushillcc": {"train_start_date": "2021-09-01", "train_end_date": "2023-04-27", "validation_start_date": "2023-04-28", "validation_end_date": "2023-07-12", "test_start_date": "2023-07-13", "test_end_date": "2023-09-28"}, "pearlhc": {"train_start_date": "2021-09-01", "train_end_date": "2023-04-27", "validation_start_date": "2023-04-28", "validation_end_date": "2023-07-12", "test_start_date": "2023-07-13", "test_end_date": "2023-09-28"}, "peacecarestann": {"train_start_date": "2021-09-01", "train_end_date": "2023-04-16", "validation_start_date": "2023-04-17", "validation_end_date": "2023-07-10", "test_start_date": "2023-07-11", "test_end_date": "2023-10-07"}, "pacificskilledhc": {"train_start_date": "2022-01-01", "train_end_date": "2023-03-13", "validation_start_date": 

Dump excluded censusactioncodes

In [11]:
data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = getattr(clientClass(), 'get_excluded_censusactioncodes')()

    except Exception as e:
        print(py, e)

print(json.dumps(data))

{"momentum": ["DH", "EXP", "AMA", "DD", "LOA", "TO"], "brushillcc": ["DE", "DD", "HBD", "L", "TO"], "pearlhc": ["DE", "DH", "RDH", "DAMA", "DD", "DHosp", "RDD", "ER", "L", "TO", "TOMBH"], "peacecarestann": ["DE", "DH", "DD", "RDD", "L", "TO"], "pacificskilledhc": ["DE", "DH", "DD", "L", "TO"]}


Dump facilities missing datasets

In [12]:
data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))

        validate_dataset = getattr(clientClass(), 'validate_dataset')
        lines = inspect.getsource(validate_dataset)

        lines = re.sub(r"\s+", "", lines)

        data[py] = {}

        if 'if(dataset_name' in lines:
            lines = lines.split('if(dataset_name', 1)
            lines = lines[1].split('):', 1)[0]

            facilities = lines.split('facilityidin[')[1].split(']')[0].split(',')
            
            try:
                datasets = [lines.split('==')[1].split(')',1)[0]]
            except Exception as e:
                datasets = lines.split('in[')[1].split('])',1)[0].split(',')

            datasets = [d.replace("'", "") for d in datasets]

            for dataset in datasets:
                data[py][dataset] = facilities        

    except Exception as e:
        # print(py, e)
        pass

print(json.dumps(data))

{"momentum": {"patient_lab_results": ["8", "9", "11", "14", "7", "5", "12", "6", "13"]}, "brushillcc": {"patient_lab_results": ["195"]}, "pacificskilledhc": {"patient_lab_results": ["124", "125"]}}


In [12]:
# data = {}

# prediction_date = '2022-01-01'
# facilityid = '1'
# train_start_date = '2021-01-01'

# def get_experiment_dates(*, clientClass):
#     dates = getattr(clientClass(), 'get_experiment_dates')()
#     return dates


# def get_missing_datasets(*, clientClass):
#     validate_dataset = getattr(clientClass(), 'validate_dataset')
#     lines = inspect.getsource(validate_dataset)

#     lines = re.sub(r"\s+", "", lines)

#     if 'if(dataset_name' in lines:
#         lines = lines.split('if(dataset_name', 1)
#         lines = lines[1].split('):', 1)[0]

#         facilities = lines.split('facilityidin[')[1].split(']')[0].split(',')
        
#         try:
#             dataset = [lines.split('==')[1].split(')',1)[0]]
#         except Exception as e:
#             dataset = lines.split('in[')[1].split('])',1)[0].split(',')

#         dataset = [d.replace("'", "") for d in dataset]

#         print(clientClass, dataset, facilities)


# for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
#     if py in ['base']:
#         continue
#     try:
#         module = import_module(f'clients.{py}')
#         clientClass = getattr(module, pascal_case(py))
#         # data[py] = get_experiment_dates(clientClass=clientClass)
#         # data[py] = get_prediction_queries(clientClass=clientClass)
#         # data[py] = get_training_queries(clientClass=clientClass)
#         data[py] = get_missing_datasets(clientClass=clientClass)

#     except Exception as e:
#         print(py, e)