# Space

In [None]:
import sys
import os 
import logging
import pandas as pd
from pprint import pprint 
from IPython.display import display, HTML

KEY = '1-WORKSPACE'
WORKSPACE_PATH = os.getcwd().split(KEY)[0]
print(WORKSPACE_PATH); os.chdir(WORKSPACE_PATH)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

SPACE = {
    'DATA_RAW': f'./_Data/0-Data_Raw',
    'DATA_RFT': f'./_Data/1-Data_RFT',
    'DATA_CASE': f'./_Data/2-Data_CASE',
    'DATA_AIDATA': f'./_Data/3-Data_AIDATA',
    'DATA_EXTERNAL': f'./code/external',
    'CODE_FN': f'./code/pipeline', 
}
assert os.path.exists(SPACE['CODE_FN']), f'{SPACE["CODE_FN"]} not found'

print(SPACE['CODE_FN'])
sys.path.append(SPACE['CODE_FN'])

# Step 1: Prepare Cohort

In [None]:
from recfldtkn.base import Base
from recfldtkn.record_base.cohort import CohortFn, Cohort
from config.config_record.Cohort import CohortName_to_OneCohortArgs

# %%%%%%%%%%%%%%%%%%%%% user
# CohortName = 'WellDoc2022CGM'
# CohortName = 'WellDoc2023CVSDeRx'
# CohortName = 'WellDoc2023CVSTDC'



# CohortName = 'WellDoc2025CVS'
# CohortName = 'WellDoc2025ALS'


CohortName = 'WellDoc2025LLY'
# CohortName = 'WellDoc2025LLY_1n3'
# CohortName = 'WellDoc2025LLY_2n3'
# CohortName = 'WellDoc2025LLY_3n3'

# CohortName = 'aireadi-noimage-v2'
OneCohort_Args = CohortName_to_OneCohortArgs[CohortName]
# %%%%%%%%%%%%%%%%%%%%%

Source2CohortName = OneCohort_Args['Source2CohortName']
cohort_fn = CohortFn(Source2CohortName, SPACE)
cohort = Cohort(OneCohort_Args, SPACE, cohort_fn)
cohort.setup_fn(cohort_fn)
cohort.initialize_cohort()

In [None]:
RawName_to_dfRaw  = cohort.RawName_to_dfRaw
RawName_to_dfRaw_Type = cohort.get_RawName_to_dfRaw_Type(RawName_to_dfRaw)
print(RawName_to_dfRaw_Type)

pprint(RawName_to_dfRaw, sort_dicts=False)

In [None]:
# RawName_to_dfRaw  = cohort.RawName_to_dfRaw
# RawName_to_dfRaw_Type = cohort.get_RawName_to_dfRaw_Type(RawName_to_dfRaw)
# print(RawName_to_dfRaw_Type)

# pprint(RawName_to_dfRaw, sort_dicts=False)

# Step 2: OneHuman_Args

In [None]:
from recfldtkn.record_base.human import Human   

HumanName = 'P'
human_fn = None
human = Human(HumanName, cohort, human_fn)

In [None]:
# %%%%%%%%%%%%%%%%%%%%% user
OneHuman_Args = {
    'HumanName': 'P', 
    'HumanID': 'PID', 
    'RawHumanID': 'PatientID', 
    'HumanIDLength': 10,
}
# %%%%%%%%%%%%%%%%%%%%% user

# Step 3: Get RawHumanID from dfRawColumns

In [None]:
import inspect 

# %%%%%%%%%%%%%%%%%%%%% user
def get_RawHumanID_from_dfRawColumns(dfRawColumns):
    RawHumanID_selected = None 
    if 'PatientID' in dfRawColumns: 
        RawHumanID_selected = 'PatientID'  
    return RawHumanID_selected

get_RawHumanID_from_dfRawColumns.fn_string = inspect.getsource(get_RawHumanID_from_dfRawColumns)
# %%%%%%%%%%%%%%%%%%%%% user


In [None]:
human.display_dfRaw_with_Columns(RawName_to_dfRaw, get_RawHumanID_from_dfRawColumns)

In [None]:
# CurriculumLessonProgressDetails

# Step 4: Excluded RawName List


We only focus on the patients who have at least one records.

But some table we don't want to consider it. For example, Patient Table. 

eg. 

patient A: PatientTable 1, ATable 0, BTable 0, CTable 0. 

patient B: PatientTable 1, ATable 1, BTable 0, CTable 0


We don't want A because A's total record number except PatientTable is 0. 
We want B because B's total  record number except PatientTable is larger than 0.


So we put `PatientTable` into `Excluded_RawNameList`.


In [None]:
# How to get the list. 

# initially, the list is empty
# based on excluded_raw_table_name is empty
# we will get the df_Human (df_Human2RawRecNum) at the end of this notebook.
# Then, we will check the df_Human, and see if there is any table that we want to exclude.
# if there is, we will add the table name to the excluded_raw_table_name list.

# %%%%%%%%%%%%%%%%%%%%% user
# Excluded_RawNameList = [] 
Excluded_RawNameList = [   'Patient', 
                           'QuestionResponse', 
                           'PatientBloodGlucoseTargets', 
                           'Rx', 'PatientObservationSummary', 
                           'PatientTargetSegment', 'TDC']
# %%%%%%%%%%%%%%%%%%%%% user

# Step 5: Human2RawNum

In [None]:
cohort = cohort 
RawName_to_dfRaw = cohort.CohortInfo['RawName_to_dfRaw']
OneHuman_Args = OneHuman_Args
get_RawHumanID_from_dfRawColumns = get_RawHumanID_from_dfRawColumns
Excluded_RawNameList = Excluded_RawNameList

df_Human2RawNum = human.get_df_Human2RawNum_on_RawNameTodfRaw(cohort, 
                                                                RawName_to_dfRaw, 
                                                                OneHuman_Args,
                                                                get_RawHumanID_from_dfRawColumns, 
                                                                Excluded_RawNameList, 
                                                                )

In [None]:
df_Human2RawNum.columns

In [None]:
df_Human2RawNum

In [None]:
# (df_Human2RawNum['ELogFoodItem'] > 0).sum()

In [None]:
print(CohortName, df_Human2RawNum['ElogBGEntry'].sum())

# WellDoc2025LLY_1n3 20395361.0
# WellDoc2025LLY_2n3 20648519.0
# WellDoc2025LLY_3n3 13665602.0

In [None]:
20395361 + 20648519 + 13665602

In [None]:
# df_Human2RawNum['ElogBGEntry'].describe()

In [None]:
# (df_Human2RawNum['ElogBGEntry'] > 0).sum()

In [None]:
# df_Human2RawNum.columns

# Step 6: Save Human Fn

In [None]:
pypath = human.pypath 

prefix = [
    'import os',
    'import pandas as pd', 
    'import numpy as np'
    ]

iterative_variables = [OneHuman_Args, Excluded_RawNameList]
fn_variables = [get_RawHumanID_from_dfRawColumns]

pycode = Base.convert_variables_to_pystirng(iterative_variables = iterative_variables, 
                                            fn_variables = fn_variables, 
                                            prefix = prefix)

if not os.path.exists(os.path.dirname(pypath)): os.makedirs(os.path.dirname(pypath))
# print(pypath)

with open(pypath, 'w') as file: file.write(pycode)
# Create a HTML link and display it
full_path = os.path.join(WORKSPACE_PATH, pypath)

display(HTML(f'{pypath} <a href="{full_path}" target="_blank">Open File</a>'))

# Step 7: Test

In [None]:
from recfldtkn.record_base.cohort import CohortFn, Cohort
from config.config_record.Cohort import CohortName_to_OneCohortArgs

# %%%%%%%%%%%%%%%%%%%%% user
# CohortName = 'WellDoc2023CVSDeRx'
# CohortName = 'WellDoc2022CGM'
OneCohort_Args = CohortName_to_OneCohortArgs[CohortName]
# %%%%%%%%%%%%%%%%%%%%%

Source2CohortName = OneCohort_Args['Source2CohortName']
cohort_fn = CohortFn(Source2CohortName, SPACE) # object of this class
cohort = Cohort(OneCohort_Args, SPACE, cohort_fn)
cohort.setup_fn(cohort_fn)
cohort.initialize_cohort()

In [None]:
from recfldtkn.record_base.human import HumanFn, Human   

# %%%%%%%%%%%%%%%%%%%%% user
HumanName = 'P'
# %%%%%%%%%%%%%%%%%%%%% user

human_fn = HumanFn(HumanName, SPACE)
human = Human(HumanName, cohort, human_fn)
human.setup_fn(human_fn)
human.initialize_human(load_data=False)

In [None]:
human.df_Human

In [None]:
human.df_Human.columns

# Step 8: Select Human

In [None]:
# human.df_Human.columns

# df = human.df_Human.copy()

# df = df[df['ElogBGEntry'] > 10000]
# df = df[df['ElogBGEntry'] < 40000]
# df = df[df['ElogWeightEntry'] > 10]
# df = df[df['PatientHeight'] >= 1]
# df = df[df['ELogFoodItem'] > 10]
# df = df[df['ELogExerciseEntry'] > 10]
# df = df[df['MedAdministration'] > 10]
# df = df[df['SleepEntry'] > 10]


# df_human_selected = df
# df_human_selected


# PID_to_RawName_to_dfRaw = {}
# template_RawName_to_dfRaw = {}
# for RawName in cohort.RawName_to_dfRaw:
#     dfRaw_Path = cohort.RawName_to_dfRaw[RawName]
#     print(f'\n\n{RawName}', dfRaw_Path)

#     try:
#         dfRaw = pd.read_csv(dfRaw_Path, low_memory= False)
#     except:
#         print(f'Error: {dfRaw_Path}')
#         continue 

#     for idx, onehuman in df_human_selected.iterrows():
#         # print(onehuman)
#         PatientID = onehuman['PatientID']

#         if PatientID not in PID_to_RawName_to_dfRaw:
#             PID_to_RawName_to_dfRaw[PatientID] = {}

#         RawName_to_dfRaw = PID_to_RawName_to_dfRaw[PatientID]

        
#         template_RawName_to_dfRaw[RawName] = dfRaw.iloc[0:0].to_dict('list')
#         if 'PatientID' in dfRaw.columns:
#             PatientID_column = 'PatientID'
#         elif 'PatientId' in dfRaw.columns:
#             PatientID_column = 'PatientId'
#             print('***PatientID is not the column name, PatientId is the column name**')
#         elif 'UserID' in dfRaw.columns:
#             PatientID_column = 'UserID'
#             print('***PatientID is not the column name, UserID is the column name**')
#         else:
#             # raise ValueError(f'Error: {RawName}')
#             print(f'Error: {RawName}, now PatientID columns: {dfRaw.columns}')
#             continue 

#         dfRaw_selected = dfRaw[dfRaw[PatientID_column] == PatientID]
#         print(RawName, len(dfRaw_selected))
#         RawName_to_dfRaw[RawName] = dfRaw_selected


# # case
# # PID + ObsDT
# RawName_list

# RawName_to_RawNameInfo = {
#     'DT_column": []
# }

# # RawName_list

# [i for i in cohort.RawName_to_dfRaw]

# # ObsPeriod

# # from ObsDT - 2m, ObsDT

# # PID + ObsDT
# select * from TableName where PID = PID and Observatiions >= ObsDT - 2m and Observatiions <= ObsDT


# df_table 

# df_table --> json. 


In [None]:
# import json 

# for PID, RawName_to_dfRaw in PID_to_RawName_to_dfRaw.items():

#     folder = os.path.join(SPACE['DATA_RAW'], 'Inference', 'patient_sample')
#     PID = str(int(PID))
#     path = os.path.join(folder, f'inference_form_sample_{PID}.json')
#     d = {}
#     d['PatientID'] = PID
#     for RawName, dfRaw in RawName_to_dfRaw.items():
#         d[RawName] = dfRaw.to_dict(orient='list')

#     with open(path, 'w') as file:
#         json.dump(d, file, indent=4)


# pprint(d)


# folder = os.path.join(SPACE['DATA_RAW'], 'Inference', 'patient_sample')
# path = os.path.join(folder, f'inference_form_template.json')

# d = template_RawName_to_dfRaw
# with open(path, 'w') as file:
#     json.dump(d, file, indent=4)