# Space

In [None]:
import sys
import os 
import logging
import pandas as pd
from pprint import pprint 
from IPython.display import display, HTML

KEY = '1-WORKSPACE'
WORKSPACE_PATH = os.getcwd().split(KEY)[0]
print(WORKSPACE_PATH); os.chdir(WORKSPACE_PATH)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

SPACE = {
    'DATA_RAW': f'./_Data/0-Data_Raw',
    'DATA_RFT': f'./_Data/1-Data_RFT',
    'DATA_CASE': f'./_Data/2-Data_CASE',
    'DATA_AIDATA': f'./_Data/3-Data_AIDATA',
    'DATA_EXTERNAL': f'./code/external',
    'CODE_FN': f'./code/pipeline', 
}
assert os.path.exists(SPACE['CODE_FN']), f'{SPACE["CODE_FN"]} not found'

print(SPACE['CODE_FN'])
sys.path.append(SPACE['CODE_FN'])

# Step 1: Cohort and Human

In [None]:
from config.config_record.Cohort import CohortName_to_OneCohortArgs

# %%%%%%%%%%%%%%%%%%%%% user
# CohortName = 'WellDoc2023CVSDeRx'
CohortName = 'WellDoc2025ALS'
OneCohort_Args = CohortName_to_OneCohortArgs[CohortName]
# %%%%%%%%%%%%%%%%%%%%%

In [None]:
# %%%%%%%%%%%%%%%%%%%%% user
HumanName = 'P'
# %%%%%%%%%%%%%%%%%%%%% user

In [None]:
from recfldtkn.record_base.cohort import CohortFn, Cohort
Source2CohortName = OneCohort_Args['Source2CohortName']
cohort_fn = CohortFn(Source2CohortName, SPACE)
cohort = Cohort(OneCohort_Args, SPACE, cohort_fn)
cohort.setup_fn(cohort_fn)
cohort.initialize_cohort()

In [None]:
cohort.process_Source_to_Raw

In [None]:
cohort.RawName_to_dfRaw

In [None]:
from recfldtkn.record_base.human import HumanFn, Human   

human_fn = HumanFn(HumanName, SPACE)
human = Human(HumanName, cohort, human_fn)
human.setup_fn(human_fn)
human.initialize_human()

In [None]:
human.pypath

In [None]:
human.df_Human

# Step 2: OneRecord_Args

In [None]:
# %%%%%%%%%%%%%%%%%%%%% user
# Define the Record Name, representing a Patient record
RecordName = 'P'
# %%%%%%%%%%%%%%%%%%%%% user

## %%%%%%%%%%%%%%%%%%%%% user
# Configuration dictionary for a single record type to centralize configuration and
# keep all relevant parameters in one place 
OneRecord_Args = {
    'RecordName': 'P', 
    'RecID': 'PID',
    'RecIDChain': ['PID'],

    'ParentRecName': None, 
    'RawHumanID': 'PatientID',

    'RecDT': None, 
    'RawNameList': ['Ptt'],
    'human_group_size': 100000,
    'rec_chunk_size': 100000,
    'UseTzColName': 'UserTimeZoneOffset',

}
## %%%%%%%%%%%%%%%%%%%%% user

In [None]:
RawName_to_dfRaw = cohort.RawName_to_dfRaw
[RawName for RawName in RawName_to_dfRaw]

In [None]:
from recfldtkn.record_base.record import RecordFn, Record
# Initialize record_fn as None, This variable will be used later to 
# assign a specific function for record processing if needed.
record_fn = None 

# Create an instance of Record class 
record = Record(RecordName, human, record_fn)
# The 'record' variable now holds an instance of the Record class,
# representing a specific type of record for the given human subject
record

In [None]:
OneCohort_Args = cohort.OneCohort_Args
RawName_to_dfRaw = cohort.RawName_to_dfRaw
df_Human = human.df_Human

# Step 3: RecordPrtInfo

In [None]:
OneHuman_Args = human.OneHuman_Args

In [None]:
OneCohort_Args

In [None]:
if OneRecord_Args['ParentRecName'] is not None:

    # full version
    # record_prt_fn = RecordFn(OneRecord_Args['ParentRecName'], SPACE)
    # record_prt = Record(OneRecord_Args['ParentRecName'], human)
    # record_prt.setup_fn(record_prt_fn)
    # record_prt.initialize_record()

    # neat version
    record_prt = Record(OneRecord_Args['ParentRecName'], human)
    record_prt.setup_fn()
    record_prt.initialize_record()
else:
    record_prt = None 
    
print('record_prt:', record_prt)

In [None]:
human = human
RecordPrtInfo = record.get_RecordPrtInfo(OneRecord_Args, 
                                         human, 
                                         record_prt) 
df_Human = human.df_Human
RecordPrtInfo

# Step 4: Display Record's RawColumns

In [None]:
RawNameList = OneRecord_Args['RawNameList']
RawName_to_dfRaw = cohort.RawName_to_dfRaw
RawName_to_Sample = record.display_Record_RawNameCols(RawNameList, RawName_to_dfRaw)

In [None]:
for RawName, df_sample in RawName_to_Sample.items():
    print(f'\n===== {RawName} =====')
    display(df_sample)

# Step 5: Select Useful Raw Columns


In [None]:
## %%%%%%%%%%%%%%%%%%%%% user
RawName_to_RawConfig = {}

RawName = 'Ptt'
raw_columns = [
 'PatientID', # 'PatientRegistrationPageID', 'AlertSystemEnabled',
 # 'PatientLastMobileActivationID', 'MaritalStatusID', 'RaceID', 'EthinicityID',
 # 'IsEligible', 'MedicationReminderEnabled', 'AppointmentReminderEnabled',
 # 'PatientCreatedDate', 'PatientModifiedDate', 'RowVersionID_x',
 # 'MedicationViewMode', 'InPersonTrainingStatus',
#  'InPersonTrainingStatusDatetime', 'InPersonTrainingContactNumber',
#  'InPersonTrainingScheduledSlot', 'IsRefillRequired',
#  'IsRefillRequiredAnsweredDate', 'PAPEligibility', 'PAPStatus',
#  'PAPStatusReason', 
 'MRSegmentID', 
 # 'RefillReason', 'LastPushNotificationID',
#  'LastPushNotificationDate', 'IsTermsAgreed',
#  'NextWeeklyReportGeneratedDatetime', 'AllowMarketingMessages',
#  'IsWeeklyChallengeStartShown', 'WeeklyChallengeCount',
#  'IsWeeklyChallengeTwelveWeekShown', 
'MRSegmentModifiedDateTime',
#  'IsWeeklyChallengeEnabled', 'NextWeeklyReportGeneratedTimeZoneOffset',
#  'NextEmailReminderCheckDateTime', 'RxRefillNotificationCheckDateTime',
#  'TrendingMessageLatestTriggerDate', 'FastingTrendingMessageLatestTriggerDate',
#  'IsEmailUnSubscribed', 'BGUOMID', 'HeightUOMID', 'WeightUOMID',
#  'IsBPIntroShown', 'DistanceUOMID', 'ExternalPatientCode', 'OnboardingStepId',
#  'IsWelcomeEmailSend', 'ChallengeNoEngagementEmailId',
#  'NonChallengeNoEngagementEmailId', 'LastNoEngagementEmailSentDateTime',
#  'IsSVReportReminderEmailSend', 'IsNoEngagementEmailStopped', 
 'DiseaseType',
#  'IsIOBEnabled', 'IOBChangedDate', 'IsInsulinAdded', 'IsCGMConnected',
#  'NextDailyReportGeneratedDatetime', 'ExternalPatientCodeHashed',
#  'PrescribedDIA', 'LevelOfEducationID', 'IDCQuizAnswers',
#  'IDCQuizRespondedDateTime', 'TitrationMode', 'IsTermsAgreedDateTime',
#  'IsSharePersonalInfoAgreed', 'IsSharePersonalInfoAgreedDateTime',
#  'TitrationType', 'ESIInvitationCode', 'EnabledNotificationCategories',
#  'IsPregnant', 'AdaptHealthUserConfiguration', 'DiseaseCombinationID', 'RoleID',
#  'UserFirstName', 'UserMiddleName', 'UserLastName', 'DateOfBirth', 
'Gender',
#  'TitleID', 'SuffixID', 'StatusID', 'StatusReason', 'EntrySourceID',
#  'UserEmail', 'UserEmailHashed', 'ExternalSystemUserID',
#  'RegistrationCompletedDate', 'RegistrationCompletedTimeZoneOffset',
#  'RegistrationCompletedTimeZone', 
 'ActivationDate',
#  'ActivationDateTimeZoneOffset', 
#  'ActivationDateTimeZone', 
 # 'CreatedDate',
#  'ModifiedDate', 'UserSkey', 'RowVersionID_y', 'CellPhoneNumber',
#  'FeatureProfileID', 'SystemUserID', 'SystemAccessCode', 'PatientAuthorization',
#  'TrainingCompletedDate', 'IsTrainingSkipped', 'UserNickName', 'UpdateSourceID',
#  'FirstNameHashed', 'LastNameHashed', 'PatientDrivenRegistrationDate',
#  'PatientDrivenRegistrationDateTimeZoneOffset',
#  'PatientDrivenRegistrationDateTimeZone', 'RegisteredSourceID', 'AccountID',
 'UserTimeZoneOffset', 'UserTimeZone', 
 'Description', 'YearOfBirth',
#  'RxRefillCheckDateTime', 'Language', 'Country', 'ExternalUserID',
#  'AHPatientKey', 'CommunityConsentDateTime', 'RecipeCommentConsentDateTime',
#  'ProfileLookupID', 'FullNameHashed'
 ]

raw_datetime_column = None 

raw_base_columns = ['PatientID', 'UserTimeZoneOffset', 'UserTimeZone', ]

RawConfig = {
    'raw_columns': raw_columns, 
    'rec_chunk_size': 100000,
    'raw_datetime_column': raw_datetime_column,
    'raw_base_columns': raw_base_columns,
}
RawName_to_RawConfig[RawName] = RawConfig
## %%%%%%%%%%%%%%%%%%%%% user

RawName_to_RawConfig

In [None]:
OneRecord_Args['RawNameList']

In [None]:
OneCohort_Args['CohortLabel']

# Step 6: Get df_HumanSelected

In [None]:
[RawName for RawName in OneRecord_Args['RawNameList']]

In [None]:
RawName_to_dfRaw

In [None]:
OneCohort_Args = cohort.OneCohort_Args
OneHuman_Args = human.OneHuman_Args
OneRecord_Args = OneRecord_Args
df_Human = human.df_Human

df_HumanSelected = record.get_dfHumanSelected(OneCohort_Args, OneHuman_Args, OneRecord_Args, df_Human)
df_HumanSelected

# Step 7: Get df_HumanGroup

In [None]:
# Iterate through each group in the DataFrame df_HumanSelected
for human_group, df_HumanGroup in df_HumanSelected.groupby('human_group'):
    logger.info(f'\n====={human_group}=====')
    display(df_HumanGroup)

# Step 8: Get df_RawRec_for_HumanGroup

In [None]:
df_RawRec_for_HumanGroup = record.get_dfRawRec_from_dfHumanGroup(OneHuman_Args,
                                                                df_HumanGroup,
                                                                RawName_to_RawConfig, 
                                                                RawName_to_dfRaw)
df_RawRec_for_HumanGroup

# Step 9: Get df_RawRecProc_for_HumanGroup

In [None]:
import inspect 


## %%%%%%%%%%%%%%%%%%%%% user
def get_RawRecProc_for_HumanGroup(df_RawRec_for_HumanGroup, OneRecord_Args, df_Human):
    df = df_RawRec_for_HumanGroup
    # 1. filter out the records we don't need (optional) 
    # 2. create a new column for raw record id (optional)
    # 3. update datetime columns 
    column = 'ActivationDate'
    df[column] = pd.to_datetime(df[column], format='mixed')
    column = 'MRSegmentModifiedDateTime'
    df[column] = pd.to_datetime(df[column], format = 'mixed')

    column = 'DiseaseType'
    df[column] = df[column].astype(float).round(1).astype(str)


    df['UserTimeZoneOffset'] = df['UserTimeZoneOffset'].fillna(0).astype(int)
    df_RawRecProc = df
    return df_RawRecProc 
## %%%%%%%%%%%%%%%%%%%%% user

get_RawRecProc_for_HumanGroup.fn_string = inspect.getsource(get_RawRecProc_for_HumanGroup)


In [None]:
df_RawRecProc_for_HumanGroup = get_RawRecProc_for_HumanGroup(df_RawRec_for_HumanGroup, OneRecord_Args, df_Human)
df_RawRecProc_for_HumanGroup 

# Step 10: Get dfRecAttr from dfRawRec

In [None]:
df_RecAttr_for_HumanGroup = record.get_dfRecAttr_from_dfRawRec(df_RawRecProc_for_HumanGroup, OneRecord_Args, RecordPrtInfo)
df_RecAttr_for_HumanGroup

# Step 11: Attr_Cols

In [None]:
## %%%%%%%%%%%%%%%%%%%%% user

base_cols = [
    'PID', 'PatientID', 
    'ActivationDate', 
    'UserTimeZone', 'UserTimeZoneOffset', 
]

attr_cols = base_cols + [
    'YearOfBirth', 
    'MRSegmentModifiedDateTime', 
    'Gender', 'MRSegmentID', 'DiseaseType'
]
## %%%%%%%%%%%%%%%%%%%%% user

df_RecAttr_for_HumanGroup = df_RecAttr_for_HumanGroup[attr_cols].reset_index(drop=True)
df_RecAttr_for_HumanGroup

# Step 12: ds_RecAttr

In [None]:
ds_RecAttr = record.get_dsRecAttr(OneRecord_Args = OneRecord_Args, 
                                  human = human, 
                                  RawName_to_RawConfig = RawName_to_RawConfig,
                                  attr_cols = attr_cols,
                                  get_RawRecProc_for_HumanGroup = get_RawRecProc_for_HumanGroup,
                                  record_prt = record_prt, 
                                  RecordPrtInfo = RecordPrtInfo)
ds_RecAttr

# Step 13: ds_RecIndex

In [None]:
ds_RecIndex = record.get_dsRecIndex(OneHuman_Args, OneRecord_Args, ds_RecAttr)
ds_RecIndex

In [None]:
ds_RecIndex.to_pandas()

# Step 14: Save Fn

In [None]:
from recfldtkn.base import Base 

prefix = ['import pandas as pd', 'import numpy as np']

iterative_variables = [OneRecord_Args, RawName_to_RawConfig, attr_cols]
fn_variables = [get_RawRecProc_for_HumanGroup]
pypath = record.pypath
print(pypath)

pycode = Base.convert_variables_to_pystirng(fn_variables = fn_variables, 
                                            iterative_variables = iterative_variables,
                                            prefix = prefix)

if not os.path.exists(os.path.dirname(pypath)): os.makedirs(os.path.dirname(pypath))
with open(pypath, 'w') as file: file.write(pycode)

# Step 15: Test 

In [None]:
from recfldtkn.record_base.cohort import CohortFn, Cohort
from config.config_record.Cohort import CohortName_to_OneCohortArgs

# %%%%%%%%%%%%%%%%%%%%% user
CohortName = 'WellDoc2023CVSDeRx'
OneCohort_Args = CohortName_to_OneCohortArgs[CohortName]
# %%%%%%%%%%%%%%%%%%%%%

Source2CohortName = OneCohort_Args['Source2CohortName']
cohort_fn = CohortFn(Source2CohortName, SPACE)
cohort = Cohort(OneCohort_Args, SPACE, cohort_fn)
cohort.setup_fn(cohort_fn)
cohort.initialize_cohort()

In [None]:
from recfldtkn.record_base.human import HumanFn, Human   

# %%%%%%%%%%%%%%%%%%%%% user
HumanName = 'P'
# %%%%%%%%%%%%%%%%%%%%% user

human_fn = HumanFn(HumanName, SPACE)
human = Human(HumanName, cohort, human_fn)
human.setup_fn(human_fn)
human.initialize_human()

In [None]:
from recfldtkn.record_base.record import RecordFn, Record
# %%%%%%%%%%%%%%%%%%%%%
RecordName = 'P' # Pat
# %%%%%%%%%%%%%%%%%%%%%



record_fn = RecordFn(RecordName, SPACE) 
record = Record(RecordName, human, record_fn)
record.setup_fn(record_fn)
record.initialize_record()    

In [None]:
record.ds_RecAttr

In [None]:
record.ds_RecIndex