
# Space

In [None]:
import os
import sys 
import logging
import pandas as pd 
from pprint import pprint 

# WorkSpace
KEY = 'WorkSpace'; WORKSPACE_PATH = os.getcwd().split(KEY)[0] + KEY; print(WORKSPACE_PATH)
os.chdir(WORKSPACE_PATH)
sys.path.append(WORKSPACE_PATH)

# Pipeline Space
from proj_space import SPACE
SPACE['WORKSPACE_PATH'] = WORKSPACE_PATH
sys.path.append(SPACE['CODE_FN'])
# pprint(SPACE)

# Available Packages
import pandas as pd
from datetime import datetime 

logger = logging.getLogger(__name__)
recfldtkn_config_path = os.path.join(SPACE['CODE_FN'], 'config_recfldtkn/')

import datasets
from recfldtkn.loadtools import load_ds_rec_and_info
from recfldtkn.configfn import load_cohort_args, load_record_args
from config_observer.CKPD import ckpd_to_CkpdObsConfig


cohort_config = load_cohort_args(recfldtkn_config_path, SPACE)
cohort_config['ckpd_to_CkpdObsConfig'] = ckpd_to_CkpdObsConfig
cohort_config['ObsDTName'] = 'ObsDT'
cohort_config['PID_ObsDT_columns'] = [cohort_config['RootID'], cohort_config['ObsDTName']]
print(cohort_config)

# 1. DsCase

In [None]:
# 0. ************ RFT config ************
RecName_to_dsRec, RecName_to_dsRecInfo = {}, {}
cohort_label_list = [1]

# 1. ************ Case Trigger config ************
TriggerCaseMethod = 'TrulicityRx'


# 2. ************ InputCaseSetName ************
# option 1
# InputCaseSetName = 'C1.2.3-CGM5MinEntry'
# InputCaseSetName = 'sftcgmbf24haf2h-rs42-ds0.1-out0.1tstail0.1vd0.1'
InputCaseSetName = None 

In [None]:
from recfldtkn.loadtools import fetch_trigger_tools

Trigger_Tools = fetch_trigger_tools(TriggerCaseMethod, SPACE)
case_id_columns = Trigger_Tools['case_id_columns']
cohort_config['case_id_columns'] = case_id_columns
case_id_columns

In [None]:
from recfldtkn.pipeline_case import get_ds_case_to_process


InputCaseSetName, df_case = get_ds_case_to_process(InputCaseSetName, 
                                                   cohort_label_list, 
                                                   TriggerCaseMethod, 
                                                   cohort_config, 
                                                   SPACE, 
                                                   RecName_to_dsRec, 
                                                   RecName_to_dsRecInfo)

logger.info(f'InputCaseSetName: {InputCaseSetName}')
logger.info(f'df_case shape: {df_case.shape}')

In [None]:
df_case = df_case.sample(1000)
df_case.shape

# Step 1. PttBasicDF

In [None]:
InfoRecName = 'P'
ds_info, _ = load_ds_rec_and_info(InfoRecName, cohort_config, cohort_label_list)
ds_info.column_names

In [None]:
##################################
TagMethod = 'PttBasicDF'
##################################

import inspect

InfoRecName = 'P'
subgroup_columns = ['patient_gender', 'patient_age_bucket', 'patient_zipcode_3']

def fn_case_tagging(df_case, ds_info, subgroup_columns, base_config):
    RootID = base_config['RootID']
    ds_info = ds_info.select_columns([RootID] + subgroup_columns)
    df_info = ds_info.to_pandas()
    df_info['cohort'] = df_info[RootID].apply(lambda x: 'C' + str(x)[:-base_config['RootIDLength']])
    # df_info.head()
    final_columns = [RootID] + subgroup_columns + ['cohort']
    df_case = pd.merge(df_case, df_info[final_columns], how = 'left')
    return df_case

fn_case_tagging.fn_string = inspect.getsource(fn_case_tagging)

In [None]:
from recfldtkn.loadtools import convert_variables_to_pystirng, load_module_variables

prefix = ['import pandas as pd', 'import numpy as np']
string_variables = [InfoRecName]
iterative_variables = [subgroup_columns]
fn_variables = [fn_case_tagging]
pycode = convert_variables_to_pystirng(string_variables = string_variables, 
                                       iterative_variables = iterative_variables, 
                                       fn_variables = fn_variables, prefix = prefix)
pypath = os.path.join(SPACE['CODE_FN'], 'fn_learning', f'{TagMethod}.py')
print(pypath)
if not os.path.exists(os.path.dirname(pypath)): os.makedirs(os.path.dirname(pypath))
with open(pypath, 'w') as file: file.write(pycode)

In [None]:
# Trigger2LearningCaseMethod
pypath = os.path.join(SPACE['CODE_FN'], 'fn_learning', f'{TagMethod}.py')
module = load_module_variables(pypath)
fn_case_tagging = module.fn_case_tagging

In [None]:
ds_info, _ = load_ds_rec_and_info(InfoRecName, cohort_config, cohort_label_list)
df_case = fn_case_tagging(df_case, ds_info, subgroup_columns, cohort_config)
df_case

In [None]:
series_time = df_case['ObsDT']
print(series_time.min(), series_time.max())

# Step 2. CFQ_Config

In [None]:
# cf_to_QueryCaseFeatConfig = {
#     'EgmBf1Y':  {
#         'case_observations': [
#             'BfPInvRN:ro.PInv-Bf1Y_ct.RecNum', # CO
#             'BfRxRN:ro.Rx-Bf1Y_ct.RecNum',
#             'BfEgmClickRN:ro.EgmClick-Bf1Y_ct.RecNum',
#         ],
#         'name_CaseGamma': 'CatUnseqTknsOneTS', # CF
        
        
#         'tkn_name_list': [
#             'BfPInvRN:recnum',
#             'BfRxRN:recnum',
#             'BfEgmClickRN:recnum',
#             'BfPInvRN:recspan',
#             'BfRxRN:recspan',
#             'BfEgmClickRN:recspan',
#         ],
#     }
# }


from config_observer.QCF import cf_to_QueryCaseFeatConfig

In [None]:
[i for i in cf_to_QueryCaseFeatConfig]

In [None]:
CFQ_TaggingList = [ 
    'PttBasicDF', 
    'EgmBf1Y', 
    'RxEgmAf1W', 
    'InvEgmAf1W',
]

In [None]:
from recfldtkn.pipeline_case import process_df_tagging_tasks_in_chunks

CASE_TAGGING_PROC_CONFIG = {
    'use_CF_from_disk': False,
    'use_CO_from_disk': False,
    'start_chunk_id': 0,
    'end_chunk_id': None,
    'chunk_size': 500000,
    'save_to_pickle': False,
    'num_processors': 1
}

TagMethod_List = CFQ_TaggingList
OutputCaseSetName, df_case = process_df_tagging_tasks_in_chunks(df_case, cohort_label_list, case_id_columns, 
                                                                InputCaseSetName, 
                                                                TagMethod_List, cf_to_QueryCaseFeatConfig, 
                                                                cohort_config, SPACE, 
                                                                RecName_to_dsRec, RecName_to_dsRecInfo,
                                                                **CASE_TAGGING_PROC_CONFIG)



df_case