# Space

In [None]:
import os
import logging
import pandas as pd 
from pprint import pprint 
from IPython.display import display, HTML
pd.set_option('display.max_columns', None)
KEY = 'WorkSpace'
WORKSPACE_PATH = os.getcwd().split(KEY)[0] + KEY
# print(WORKSPACE_PATH)
os.chdir(WORKSPACE_PATH)
import sys
from proj_space import SPACE
sys.path.append(SPACE['CODE_FN'])
SPACE['WORKSPACE_PATH'] = WORKSPACE_PATH
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

from datasets import disable_caching
disable_caching()

SPACE['MODEL_ENDPOINT'] = 'vTest'

In [None]:
%load_ext autoreload
%autoreload 2


# Part 1: AIData

In [None]:


# Oneday: 288, 24pd. 1/12
from datasets import load_from_disk


# 24 / 288

# AIDataName = 'CGM_32h_24pd_WellDoc_v2_v0323' # CGM, 32h, 24 data per day. 
# AIDataName = 'CGM_32h_24pd_WellDoc_v2_sample' # CGM, 32h, 24 data per day. 
AIDataName = 'CGM2EventFood_bf6h_WellDoc_v2_v0323'


path = os.path.join(SPACE['DATA_AIDATA'], AIDataName)
print(path)
dataset = load_from_disk(path)
# dataset

config = dataset.info.__dict__['config_name']# .features['cf'].feature.vocab
print([i for i in config])
CF_to_CFvocab = config['CF_to_CFvocab']
print([i for i in CF_to_CFvocab])

CF_to_CFArgs = config['CaseSettingInfo']['Case_Args_Settings']['CF_to_CFArgs']
print([i for i in CF_to_CFArgs])


TriggerCaseBaseName = config['TriggerCaseBaseName']
TriggerCaseBaseArgs = config['TriggerCaseBaseName_to_TriggerCaseBaseArgs'][TriggerCaseBaseName]
TriggerName = TriggerCaseBaseArgs['Trigger']['TriggerName']
TriggerName
# print(TriggerCaseBaseArgs)


In [None]:
# df_tag.columns

from recfldtkn.base import assign_caseSplitTag_to_dsCase
from recfldtkn.base import apply_multiple_conditions
import numpy as np 


columns = dataset.column_names
columns_tag = [i for i in columns if '--' not in i]
df_tag = dataset.select_columns(columns_tag).to_pandas()

def map_age_to_agegroup(age):
    if age < 18:
        return '0-17'
    elif 18<= age < 40:
        return '18-39'
    elif 40<= age < 65:
        return '40-64'
    else:
        return '65+'
    
###### additional tagging columns 
df_tag['Year'] = df_tag['ObsDT'].dt.year
df_tag['Cohort'] = df_tag['PID'].astype(str).str[0]
df_tag['Age'] = df_tag['Year'] - df_tag['YearOfBirth']  # .dt.year
df_tag['AgeGroup'] = df_tag['Age'].apply(map_age_to_agegroup)
##########################


# dataset = dataset.add_column('Age', df_tag['Age'].values)
# dataset = dataset.add_column('Cohort', df_tag['Cohort'].values)
# dataset = dataset.add_column('Year', df_tag['Year'].values)
# dataset = dataset.add_column('AgeGroup', df_tag['AgeGroup'].values)


In [None]:
Split_to_Selection = {
    'Train': {
        'Rules': [
            ['Age', '>=', 40],
            ['Cohort', 'in', ['1', '2', '3']], # <--- add Cohort column
            ['Year', 'in', [2020, 2021, 2022, 2023]], # <--- add Year column
            ['GenderGroup', 'in', ['Gender.1', 'Gender.2']], 
            ['ObsDT', '<', '2022-07-01'], 
            ['ObsDT', '>=', '2021-01-01'],
        ], 
        'Op': 'and',
    },
    'Val': {
        'Rules': [
            ['Age', '>=', 40],
            ['Cohort', 'in', ['1', '2', '3']], # <--- add Cohort column
            ['Year', 'in', [2020, 2021, 2022, 2023]], # <--- add Year column
            ['ObsDT', '<', '2023-01-01'], 
            ['ObsDT', '>=', '2022-07-01'],
            ['GenderGroup', 'in', ['Gender.1', 'Gender.2']], 
        ], 
        'Op': 'and',
    },
    'Test': {
        'Rules': [
            ['Age', '>=', 40],
            ['Cohort', 'in', ['1', '2', '3']], # <--- add Cohort column
            ['Year', 'in', [2020, 2021, 2022, 2023]], # <--- add Year column
            ['ObsDT', '>=', '2023-01-01'], 
            ['ObsDT', '<', '2024-01-01'],
            ['GenderGroup', 'in', ['Gender.1', 'Gender.2']], 
        ], 
        'Op': 'and',
    }
}

In [None]:
split_to_dataset = {}
for split_name, Selection in Split_to_Selection.items():
    # split_to_dataset[split_name] = dataset.filter(lambda x: apply_multiple_conditions(x, split_config['Rules'], split_config['Op']))
    Rules = Selection['Rules']
    Op = Selection['Op']

    index = apply_multiple_conditions(df_tag, Rules, Op)
    indices = np.where(index == 1)[0]
    # len(indices)
    dataset_selected = dataset.select(indices)
    split_to_dataset[split_name] = dataset_selected


split_to_dataset

In [None]:
Name_to_Data = {}
for split, dataset in split_to_dataset.items():
    Name_to_Data[split] = {'ds_case': dataset}
Name_to_Data

In [None]:
OneEntryArgs = {
     # ----------------- Input Part -----------------
    'Input_Part': {
        'EntryInputMethod': 'Mto1Period_MultiTknInStepNoWgt',
        'CF_list': [
            'cf.TargetCGM_Bf24H',
            # 'cf.TargetCGM_Af2H',

            'cf.TimeSparse_Bf24H', 
            # 'cf.TimeSparse_Af2H',

            # 'cf.Diet5MinBaseLMH_Bf24H',
            # 'cf.Diet5MinBaseLMH_Af2H',
        ],
        'TargetField': 'TargetCGM',
        'TimeField':   'Time',
        # 'EventFields': [
        #     # 'Activity',
        #     'Diet5MinBaseLMH',
        # ],
        'BeforePeriods': ['Bf24H'],
        # 'AfterPeriods': ['Af2H'],
        'InferenceMode': False, # 'WithFutureEvent' #  # 'NoFutureEvent', 'WithFutureEvent', 
    }, 

    # ----------------- Output Part -----------------
    'Output_Part': {
        'EntryOutputMethod': 'EventPred',
        
        # ------------ one head for time to now ------------
        'EventTimeToNow': 'co.Bf24H_Diet5MinInfo:MinToNow',
        'label_to_id_head1': {'0h': 0, '1h': 1, '2h': 2, 
                              '3h': 3, '4h': 4, '5h': 5},
        'dimensions_head1': ['food_event_time'],
        # ------------ one head for food content ------------
        'EventCF_Name': 'cf.Diet5MinBaseLMH_Bf24H',
        'label_to_id_head2': {'low': 0, 'medium': 1, 'high': 2},
        'dimensions_head2': ['carbs', 'fiber','fat', 'protein', 'sugar'],

        'set_transform': True,
        'num_proc': 4, 
    },
}


from recfldtkn.aidata_base.entry import EntryAIData_Builder

entry = EntryAIData_Builder(TriggerName = TriggerName, 
                            OneEntryArgs = OneEntryArgs, 
                            SPACE = SPACE)

In [None]:
Name_to_Data = entry.setup_EntryFn_to_NameToData(Name_to_Data, CF_to_CFvocab, OneEntryArgs)
# Name_to_Data

In [None]:
Data = Name_to_Data['Train']

# Data
ds_tfm = Data['ds_tfm']
ds_tfm


batch_size = 4
batch = ds_tfm[:batch_size]
batch.keys()


# Part 2: Model Init

In [None]:
[CF for CF in CF_to_CFvocab]

In [None]:
OneEntryArgs

In [None]:
# InputPart = OneEntryArgs['Input_Part']
# TargetField = InputPart['TargetField']
# TimeField = InputPart['TimeField']
# EventFields = InputPart['EventFields']


# CF_list = InputPart['CF_list']  
# FieldList = [TimeField] + EventFields
# # FieldList

# Field_to_CFs = {Field: [CF for CF in CF_list if Field in CF] for Field in FieldList}
# # Field_to_CFs


# CF_to_CFvocab = CF_to_CFvocab
# Field_to_CFvocab = {Field: CF_to_CFvocab[CFs[0]] for Field, CFs in Field_to_CFs.items()}
# # Field_to_CFvocab


# field_to_vocabsize = {Field: len(Field_to_CFvocab[Field]['input_ids']['tkn2tid']) for Field in FieldList}
# field_to_vocabsize


# Event Prediction

## Step 1: model config

In [None]:
# from nn.cgmlhm.configuration_cgmlhm import CgmLhmConfig 
from nn.cgmevent.configuration_fieldencoder import FieldEncoderConfig

ModelArgs = {
    'model_type': 'cgm_encoder',
    'num_classes': 6,
    'num_hidden_layers': 6,
}
config = FieldEncoderConfig(**ModelArgs)
# print(config)
# config.field_to_fieldinfo
config

## Step 2: model structure

In [None]:
from nn.cgmevent.modeling_fieldencoder import FieldEncoderForClassification

eventmodel = FieldEncoderForClassification(config)
eventmodel

## Step 3: forward

In [None]:




eventmodel_input = {
    'input_ids': batch['input_ids'],
    'labels': batch['food_event_time_labels'],
    # 'timestep_ids': batch['Time--timestep_orig_ids'],
    # 'attention_mask': batch['attention_mask'],
}


event_outputs = eventmodel(**eventmodel_input)
event_outputs



# Food and Carb Model

## Step 1:model config

In [None]:
# from nn.cgmlhm.configuration_cgmlhm import CgmLhmConfig 
from nn.cgmevent.configuration_fieldencoder import FieldEncoderConfig

ModelArgs = {
    'model_type': 'cgm_encoder',
    'num_classes': 6,
    'num_hidden_layers': 6,
    'quantity_regression':False,
    'num_quantity_classes':3
}
config = FieldEncoderConfig(**ModelArgs)
# print(config)
# config.field_to_fieldinfo
config

## Step 2: model structure

In [None]:
from nn.cgmevent.modeling_fieldencoder import FieldEncoderForClassificationAndRegression

eventmodel = FieldEncoderForClassificationAndRegression(config)
eventmodel

## step 3: forward

In [None]:


eventmodel_input = {
    'input_ids': batch['input_ids'],
    'labels': batch['food_event_time_labels'],
    # 'timestep_ids': batch['Time--timestep_orig_ids'],
    'labels_quantity': batch['carbs_labels'],
}


event_outputs = eventmodel(**eventmodel_input)
event_outputs