# Space


In [None]:
import sys
import os 
import logging
import pandas as pd
from pprint import pprint 
from IPython.display import display, HTML

KEY = '1-WORKSPACE'
WORKSPACE_PATH = os.getcwd().split(KEY)[0]
print(WORKSPACE_PATH); os.chdir(WORKSPACE_PATH)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

SPACE = {
    'DATA_RAW': f'./_Data/0-Data_Raw',
    'DATA_RFT': f'./_Data/1-Data_RFT',
    'DATA_CASE': f'./_Data/2-Data_CASE',
    'DATA_AIDATA': f'./_Data/3-Data_AIDATA',
    'DATA_EXTERNAL': f'./code/external',
    'CODE_FN': f'./code/pipeline', 
}
assert os.path.exists(SPACE['CODE_FN']), f'{SPACE["CODE_FN"]} not found'

print(SPACE['CODE_FN'])
sys.path.append(SPACE['CODE_FN'])

# Step 1: OneCohort Record Base

In [None]:
from config.config_record.Cohort import CohortName_to_OneCohortArgs
CohortNames = [i for i in CohortName_to_OneCohortArgs.keys()]
CohortNames 

In [None]:
# create a dictionary to store the arguments for the HumanRecordRecfeat module
CohortName = 'WellDoc2023CVSDeRx'
HumanRecordRecfeat_Args = {
    'P': {
        'P': [], 
        'CGM5Min': []
    }
}
HumanRecordRecfeat_Args

In [None]:
# Import the OneCohort_Record_Base class from the recfldtkn.record_base module
from recfldtkn.record_base import OneCohort_Record_Base
# Create an instance of the OneCohort_Record_Base class
onecohort_record_base = OneCohort_Record_Base(CohortName, 
                                              HumanRecordRecfeat_Args,
                                              CohortName_to_OneCohortArgs,
                                              SPACE
                                            )
# Set up the mapping from the feature name to the function that will be used to extract the feature
onecohort_record_base.setup_NameToFn()
# Initialize the NameToObject dictionary
onecohort_record_base.initialize_NameToObject()

In [None]:
print(onecohort_record_base.Name_to_HRF)
print(onecohort_record_base.Name_to_Fn)
print(onecohort_record_base)

# Step 2: ROName Parser

In [None]:
record = onecohort_record_base.Name_to_HRF[('P', 'P')] # HumanRecordFeat
# Access the ds_RecAttr attribute of the record object 
# The ds_RecAttr attribute is a dataset object with features and it contains 69 rows of data.
record.ds_RecAttr

# ds_RecAttr appears to be an actual dataset, not just a list of column names

In [None]:
onecohort_record_base.Name_to_HRF[('P', 'CGM5Min')].ds_RecAttr

# Step 3: TriggerName

In [None]:
# from recfldtkn.case_base.cohort_case_base import CaseSet_Trigger_Fn

###################################### # user 
Trigger = 'CGM5MinEntry'
Trigger_Args = {
    'Trigger': 'CGM5MinEntry',
    'case_id_columns': ["PID", "ObsDT"], 
    'HumanID_list': ['PID'],
    'ObsDT': 'ObsDT',
    'ROName_to_RONameArgs': {
        'hP.rCGM5Min': {
            'attribute_columns': ['PID', 'DT_s'],
        },
    },
}
######################################


In [None]:
# Import 'inspect' module, which provides several useful functions to help get information about live objects
import inspect
import random
import numpy as np


###################################### user
def get_CaseTrigger_from_RecordBase(onecohort_record_base, Trigger_Args):

    TriggerName = Trigger_Args['Trigger']
    if TriggerName in onecohort_record_base.TriggerName_to_dfCaseTrigger:
        df_case_raw = onecohort_record_base.TriggerName_to_dfCaseTrigger[TriggerName]
    else:
        ROName_to_RONameArgs = Trigger_Args['ROName_to_RONameArgs']
        ROName_to_RODS = {}
        for ROName, ROName_Args in ROName_to_RONameArgs.items():
            RONameInfo = onecohort_record_base.parse_ROName(ROName)
            # print(RONameInfo)
            HumanName, RecordName = RONameInfo['HumanName'], RONameInfo['RecordName']
            record = onecohort_record_base.Name_to_HRF[(HumanName, RecordName)]

            attribute_columns = ROName_Args['attribute_columns']
            if hasattr(record, 'ds_RecAttr'):
                df_case = record.ds_RecAttr.select_columns(attribute_columns).to_pandas()
            else:
                df_case = record.df_RecAttr
            ROName_to_RODS[ROName] = df_case
        ROName = 'hP.rCGM5Min'
        df_case_raw = ROName_to_RODS[ROName]
        # --------------------------------------------------------------------------

    df_case = df_case_raw
    # ------------------------------ Determine the ObsDT ------------------------------
    if 'ObsDT' not in df_case.columns:
        df_case['ObsDT'] = df_case['DT_s'] # [df_case_raw['drug_name'] == 'Trulicity']
        df_case = df_case.drop(columns = 'DT_s') # .from_pandas(df_case_filter)
        df_case['ObsDT'] = pd.to_datetime(df_case['ObsDT'])
    else:
        df_case['ObsDT'] = pd.to_datetime(df_case['ObsDT'])
    # --------------------------------------------------------------------------

    # ------------------------------- Update Column Sequence ------------------------
    case_id_columns = Trigger_Args['case_id_columns']
    columns = df_case.columns 
    columns = case_id_columns + [col for col in columns if col not in case_id_columns]
    df_case = df_case[columns].reset_index(drop=True)

    df_case = df_case.groupby(case_id_columns).last().reset_index()

    random.seed(42)
    np.random.seed(42)
    # torch.manual_seed(42)
    # torch.cuda.manual_seed_all(42)
    df_case['_keep_ratio'] = np.random.rand(len(df_case))
    # -------------------- e: deal with ROName = 'hP.rCGM5Min' ------------------
    return df_case

get_CaseTrigger_from_RecordBase.fn_string = inspect.getsource(get_CaseTrigger_from_RecordBase)
######################################

In [None]:
from recfldtkn.case_base.case_base import CASESET_TRIGGER_PATH
from recfldtkn.base import Base

pypath = os.path.join(SPACE['CODE_FN'], CASESET_TRIGGER_PATH, f'{Trigger}.py')
print(pypath) 


prefix = ['import pandas as pd', 'import random', 'import numpy as np']
string_variables = [Trigger]
iterative_variables = [Trigger_Args]
fn_variables = [get_CaseTrigger_from_RecordBase]
pycode = Base.convert_variables_to_pystirng(string_variables = string_variables, 
                                       iterative_variables = iterative_variables, 
                                       fn_variables = fn_variables, prefix = prefix)

# print(pypath)
if not os.path.exists(os.path.dirname(pypath)): os.makedirs(os.path.dirname(pypath))
with open(pypath, 'w') as file: file.write(pycode)

In [None]:
df_case = get_CaseTrigger_from_RecordBase(onecohort_record_base, Trigger_Args)
df_case

In [None]:

# df_case['feature1'] = 122
# df_case['feature2'] = 1232
# df_case['feature_list'] = len(df_case) * 'a b c d'
# df_case