# Space

In [None]:
import sys
import os 
import logging
import pandas as pd
from pprint import pprint 
from IPython.display import display, HTML

KEY = '1-WORKSPACE'
WORKSPACE_PATH = os.getcwd().split(KEY)[0]
print(WORKSPACE_PATH); os.chdir(WORKSPACE_PATH)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

SPACE = {
    'DATA_RAW': f'./_Data/0-Data_Raw',
    'DATA_RFT': f'./_Data/1-Data_RFT',
    'DATA_CASE': f'./_Data/2-Data_CASE',
    'DATA_AIDATA': f'./_Data/3-Data_AIDATA',
    'DATA_EXTERNAL': f'./code/external',
    'CODE_FN': f'./code/pipeline', 
}
assert os.path.exists(SPACE['CODE_FN']), f'{SPACE["CODE_FN"]} not found'

print(SPACE['CODE_FN'])
sys.path.append(SPACE['CODE_FN'])

# Step 5: Test

In [None]:
from config.config_record.Cohort import CohortName_to_OneCohortArgs
from recfldtkn.record_base import OneCohort_Record_Base
import copy

#############################
CohortName = 'WellDoc2023CVSDeRx'
CohortName_to_OneCohortArgs = {k: v for k, v in CohortName_to_OneCohortArgs.items() if k == CohortName}


TriggerName = 'CGM5MinEntry'
#############################


HumanRecordRecfeat_Args = {
    'P': {
        'P': [],
        'CGM5Min': [],
        'Med5Min': [],
        'Exercise5Min':[],
        'Diet5Min':[],
    }
}


Record_Proc_Config = {
    'via_method': 'ds',
    'save_data': True,
    'load_data': True,
    'shadow_df': True,
}


#### update the HumanRecordRecfeat_Args based on the TriggerCaseBase_Args
Final_HumanRecordRecfeat_Args = copy.deepcopy(HumanRecordRecfeat_Args)
Final_HumanRecordRecfeat_Args['P']['Inv'] = []
print(Final_HumanRecordRecfeat_Args)



#### Eventually, we need to build the TriggerCaseBase_Args 
TriggerCaseBase_Args = {
    'Trigger': {
        'TriggerName': TriggerName, 
    },
} 

onecohort_record_base = OneCohort_Record_Base(CohortName, 
                                              Final_HumanRecordRecfeat_Args,
                                              CohortName_to_OneCohortArgs,
                                              Record_Proc_Config = Record_Proc_Config, 
                                              SPACE = SPACE,
                                            )
onecohort_record_base.setup_NameToFn()
onecohort_record_base.initialize_NameToObject()
onecohort_record_base

In [None]:
onecohort_record_base

In [None]:
from recfldtkn.case_base.caseset import get_CaseFnTaskArgs_from_CaseFnNameList
CF_list = [
   # 'PDemoBase',
   'CGMInfoBf24h',
   'CGMInfoAf2h',
   'CGMInfoAf2to8h',
   'DMEInfoBf24h',
   'DMEInfoAf2h',
]

CaseFnTaskArgs = get_CaseFnTaskArgs_from_CaseFnNameList(CF_list, onecohort_record_base, SPACE)
HumanRecordRecfeat_Args = CaseFnTaskArgs['HumanRecordRecfeat_Args']
CaseFnName_to_CaseFnInfo = CaseFnTaskArgs['CaseFnName_to_CaseFnInfo']
pprint(CaseFnName_to_CaseFnInfo)

In [None]:
[i for i in CaseFnTaskArgs]

ROName_list = CaseFnTaskArgs['ROName_list']
Ckpd_to_CkpdObsConfig = CaseFnTaskArgs['Ckpd_to_CkpdObsConfig']

from recfldtkn.case_base.casefnutils.ro import get_RONameToROInfo, get_RONameToROData_for_OneCaseExample

ROName_to_ROInfo = get_RONameToROInfo(ROName_list, onecohort_record_base, Ckpd_to_CkpdObsConfig)
pprint(ROName_to_ROInfo, sort_dicts=False)

In [None]:
# very high level config
# not related to any specific CF

from config.config_case.Flt import FltName_to_FltArgs
from config.config_case.TagRec import TagRec_to_TagRecArgs
from recfldtkn.case_base.case_base import OneCohortTrigger_CaseBase

Case_Args_Settings = {
    'FltName_to_FltArgs': FltName_to_FltArgs,
    'TagRec_to_TagRecArgs': TagRec_to_TagRecArgs,
}

Case_Proc_Config = {
    'max_trigger_case_num': 10000, 
    'use_task_cache': False, 
    'caseset_chunk_size': 50000,
    'load_casecollection': False,
    'save_data': False, 
    'load_data': False, 
    'via_method': 'df',
    'n_cpus': 1, 
    'batch_size': 1000,  
}


onecohort_trigger_casebase = OneCohortTrigger_CaseBase(
    onecohort_record_base = onecohort_record_base, 
    TriggerCaseBase_Args = TriggerCaseBase_Args,
    Case_Proc_Config = Case_Proc_Config,
    Case_Args_Settings = Case_Args_Settings,
    SPACE = SPACE 
)

onecohort_trigger_casebase.init_OneCohortTrigger()
CaseSetName_to_caseset = onecohort_trigger_casebase.CaseSetName_to_caseset
for Name, caseset in CaseSetName_to_caseset.items():
    print(caseset)

# Test Speed

In [None]:
from recfldtkn.case_base.casefnutils.casefn import Case_Fn, get_CaseFnNameToCaseFnData_for_OneCaseExample
from recfldtkn.case_base.casefnutils.hrf import get_HumanDirectoryArgs_ForBatch, get_HRFDirectory_from_HumanDirectory
from datetime import datetime 


results = []
log = []
s_total = datetime.now()

TEST_NUM = min(10000, len(caseset.df_case))
df_case_batch = caseset.df_case.iloc[: TEST_NUM]

s = datetime.now()
s1 = datetime.now()
HumanDirectory_Args = get_HumanDirectoryArgs_ForBatch(df_case_batch, HumanRecordRecfeat_Args)
e1 = datetime.now()
print(f'HumanDirectory Time: {e1 - s1}')

s2 = datetime.now()
HRFDirectory        = get_HRFDirectory_from_HumanDirectory(onecohort_record_base, HumanDirectory_Args, HumanRecordRecfeat_Args)
e2 = datetime.now()
print(f'HRFDirectory Time: {e2 - s2}')

e = datetime.now()
du = e - s
print(f'HRF Time',  du)

RO_to_Cache = {}
RCKPD_to_Cache = {}

s_process = datetime.now()
for i in range(TEST_NUM):
    log_i = {}
    case_example = caseset.df_case.iloc[i]
    # print(case_example.to_dict())
    s = datetime.now()
    ROName_to_ROData = get_RONameToROData_for_OneCaseExample(case_example, 
                                                            ROName_to_ROInfo, 
                                                            HRFDirectory, 
                                                            RO_to_Cache, 
                                                            RCKPD_to_Cache,
                                                            caseset)
    e = datetime.now()
    du = e - s
    log_i['ROData'] = du.total_seconds()


    ##########################################
    s = datetime.now()
    CaseFnName_to_CaseFnData = get_CaseFnNameToCaseFnData_for_OneCaseExample(
                                                  case_example, 
                                                  CaseFnName_to_CaseFnInfo,
                                                  ROName_to_ROInfo,
                                                  ROName_to_ROData,
                                                  caseset, # <--- this is weird, what do you want here? the caseset information?
                                                           # <--- maybe remove it in the future. 
                                                  )
    results.append(CaseFnName_to_CaseFnData)
    e = datetime.now()
    ##########################################
    
    du = e - s
    log_i['CaseData'] = du.total_seconds()
    log.append(log_i)

e_process = datetime.now()
print(f'Process Time: {e_process - s_process}')
e_total = datetime.now()
print('Total Time:   ', e_total - s_total)

df = pd.DataFrame(log)
print(df.sum())
df.plot()

df_results = pd.DataFrame(results)
# print(df_results['NumRxBf1M-num'].value_counts())
df_results.head()

df_final = pd.concat([df_case_batch, df_results], axis = 1)
df_final
## when using the 'df' for the record. 
# HumanDirectory Time: 0:00:00.001231
# HRFDirectory Time: 0:00:17.551468
# HRF Time 0:00:17.553064
# Process Time: 0:00:03.336460
# Total Time:    0:00:20.889790
# ROData      1.071531
# CaseData    2.046148
# dtype: float64



#### when using the 'ds' for the record. 
# HumanDirectory Time: 0:00:00.000843
# HRFDirectory Time: 0:00:31.597045
# HRF Time 0:00:31.598115


In [None]:
# 10000 / 60