## Migrate client files

In [1]:
import inspect
import sys
sys.path.insert(0, '/src')
import json

import os
from importlib import import_module
from shared.utils import pascal_case
import re
path = '/src/clients'

In [2]:
client_files = [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']

prediction_date = '2022-01-01'
facilityid = '1'
train_start_date = '2021-01-01'

In [3]:
prediction_date = '2022-01-01'
facilityid = '1'
train_start_date = '2021-01-01'


def get_training_queries(*, clientClass):
    try:
        queries = getattr(clientClass(), 'get_training_queries')(train_start_date, train_start_date)
    except:
        queries = getattr(clientClass(), 'get_training_queries')(
            test_end_date=train_start_date, 
            train_start_date=train_start_date,
        )
    return queries


def get_prediction_queries(*, clientClass):
    try:
        queries = getattr(clientClass(), 'get_prediction_queries')(prediction_date, facilityid, train_start_date)
    except:
        queries = getattr(clientClass(), 'get_prediction_queries')(
            prediction_date=prediction_date, 
            facilityid=facilityid, 
            train_start_date=train_start_date,
        )
    return queries


def check_unique_queries(*, data, keep_client_file):
    unique_queries = {}

    for client, queries in data.items():
        if client in keep_client_file:
            continue
        for query_name, query in queries.items():
            query = re.sub(r"\s+", "", query)

#             if query_name == 'patient_census':
#                 query = query.split("andcensusactioncodenotin(", 1)
#                 query = query[0] + query[1].split(")", 1)[1]

            unique_query = unique_queries.get(query_name, {})
            clients = unique_query.get(query, [])
            clients.append(client)
            unique_query[query] = clients
            unique_queries[query_name] = unique_query

    print(unique_queries.keys())

    for query_name, query in unique_queries.items():
        if len(query) > 1:
            print(query_name, len(query))
            for q, clients in query.items():
                print(clients)
            print('-'*100)

### Setup client names for which we want to keep the files
Update the list of client names in the cell below. The list should contain the names of the clients for which we want to keep the files.
Then run the two cells bellow the list - the otput should be empty. If not, there are some differences between the queries accross clients - please check, if you can update them to be the same, or add the client name to keep_client_file list.

In [4]:
keep_client_file = []

In [5]:
# Check training queries

data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = get_training_queries(clientClass=clientClass)

    except Exception as e:
        print(py, e)

check_unique_queries(data=data, keep_client_file=keep_client_file)

dict_keys(['patient_vitals', 'patient_admissions', 'master_patient_lookup', 'patient_census', 'patient_rehosps', 'patient_orders', 'patient_demographics', 'patient_diagnosis', 'patient_progress_notes'])
patient_vitals 2
['loh', 'nycrn', 'summitltc', 'optima', 'colonial', 'crn', 'pji', 'excels_cgcnr', 'hcomgmt', 'ahc', 'excels_rmc', 'excels', 'seniorlp', 'witherell', 'trinity', 'chhc', 'excels_sgnh', 'excels_tnec', 'pointegroupcarem', 'epic_hcm', 'mozaicsl', 'woodruff', 'whcg', 'friendshiprc', 'symp_wonh', 'pleasantview', 'cardinalritter']
['arkpostmc']
----------------------------------------------------------------------------------------------------
patient_admissions 2
['loh', 'nycrn', 'summitltc', 'optima', 'colonial', 'crn', 'pji', 'excels_cgcnr', 'hcomgmt', 'ahc', 'excels_rmc', 'excels', 'seniorlp', 'witherell', 'trinity', 'chhc', 'excels_sgnh', 'excels_tnec', 'pointegroupcarem', 'epic_hcm', 'mozaicsl', 'woodruff', 'whcg', 'friendshiprc', 'symp_wonh', 'pleasantview', 'cardinalrit

In [6]:
# Check prediction queries

data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = get_prediction_queries(clientClass=clientClass)

    except Exception as e:
        print(py, e)

check_unique_queries(data=data, keep_client_file=keep_client_file)

dict_keys(['patient_census', 'patient_vitals', 'patient_admissions', 'master_patient_lookup', 'patient_rehosps', 'patient_demographics', 'patient_diagnosis', 'patient_progress_notes', 'patient_room_details', 'patient_orders', 'patient_meds'])
patient_census 5
['loh', 'summitltc', 'colonial', 'hcomgmt', 'ahc', 'witherell', 'trinity', 'chhc', 'pointegroupcarem', 'mozaicsl', 'woodruff', 'friendshiprc', 'pleasantview', 'cardinalritter']
['nycrn', 'optima', 'crn', 'epic_hcm', 'whcg', 'symp_wonh']
['pji', 'excels_cgcnr', 'excels_rmc', 'excels', 'excels_sgnh', 'excels_tnec']
['arkpostmc']
['seniorlp']
----------------------------------------------------------------------------------------------------
patient_vitals 2
['loh', 'nycrn', 'summitltc', 'optima', 'colonial', 'crn', 'pji', 'excels_cgcnr', 'hcomgmt', 'ahc', 'excels_rmc', 'excels', 'seniorlp', 'witherell', 'trinity', 'chhc', 'excels_sgnh', 'excels_tnec', 'pointegroupcarem', 'epic_hcm', 'mozaicsl', 'woodruff', 'whcg', 'friendshiprc', 's

Dump missing datasets for clients

In [6]:
data = {}

query_names = [
    'patient_census', 
    'patient_vitals', 
    'patient_admissions', 
    'master_patient_lookup', 
    'patient_rehosps', 
    'patient_demographics', 
    'patient_diagnosis', 
    'patient_meds', 
    'patient_orders', 
    'patient_alerts', 
    'patient_progress_notes', 
    'patient_lab_results', 
    'patient_room_details'
]

_queries = set()

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        queries = get_prediction_queries(clientClass=clientClass)

        data[py] = []

        for query_name in query_names:
            query = queries.get(query_name, None)
            if not query:
                data[py].append(query_name)
                _queries.add(query_name)

    except Exception as e:
        print(py, e)

print(json.dumps(data))

print(_queries)

{"loh": ["patient_alerts", "patient_lab_results"], "nycrn": ["patient_alerts", "patient_lab_results"], "summitltc": ["patient_alerts", "patient_lab_results"], "optima": ["patient_alerts", "patient_lab_results"], "colonial": ["patient_alerts", "patient_lab_results"], "crn": ["patient_alerts", "patient_lab_results"], "pji": ["patient_alerts", "patient_lab_results"], "excels_cgcnr": ["patient_alerts", "patient_lab_results"], "hcomgmt": ["patient_meds", "patient_orders", "patient_alerts", "patient_lab_results"], "ahc": ["patient_alerts", "patient_lab_results"], "arkpostmc": ["patient_meds", "patient_orders", "patient_alerts", "patient_lab_results"], "excels_rmc": ["patient_alerts", "patient_lab_results"], "excels": ["patient_alerts", "patient_lab_results"], "seniorlp": ["patient_alerts", "patient_lab_results"], "witherell": ["patient_alerts", "patient_lab_results"], "trinity": ["patient_alerts", "patient_lab_results"], "chhc": ["patient_alerts", "patient_lab_results"], "excels_sgnh": ["pat

Dump experiment dates

In [7]:
data = {}

for py in client_files:
    if py in ['base']:
        continue
    try:
        module = import_module(f'clients.{py}')
        clientClass = getattr(module, pascal_case(py))
        data[py] = getattr(clientClass(), 'get_experiment_dates')()

    except Exception as e:
        print(py, e)

print(json.dumps(data))

loh 'Loh' object has no attribute 'get_experiment_dates'
nycrn 'Nycrn' object has no attribute 'get_experiment_dates'
summitltc 'Summitltc' object has no attribute 'get_experiment_dates'
optima 'Optima' object has no attribute 'get_experiment_dates'
colonial 'Colonial' object has no attribute 'get_experiment_dates'
crn 'Crn' object has no attribute 'get_experiment_dates'
pji 'Pji' object has no attribute 'get_experiment_dates'
excels_cgcnr 'ExcelsCgcnr' object has no attribute 'get_experiment_dates'
hcomgmt 'Hcomgmt' object has no attribute 'get_experiment_dates'
ahc 'Ahc' object has no attribute 'get_experiment_dates'
arkpostmc 'Arkpostmc' object has no attribute 'get_experiment_dates'
excels_rmc 'ExcelsRmc' object has no attribute 'get_experiment_dates'
excels 'Excels' object has no attribute 'get_experiment_dates'
seniorlp 'Seniorlp' object has no attribute 'get_experiment_dates'
witherell 'Witherell' object has no attribute 'get_experiment_dates'
trinity 'Trinity' object has no att