# Space

In [None]:
import os
import sys
import logging
import pandas as pd 
from pprint import pprint 

from datasets import disable_caching
from IPython.display import display, HTML

pd.set_option('display.max_columns', None)
KEY = 'WorkSpace'; WORKSPACE_PATH = os.getcwd().split(KEY)[0] + KEY; os.chdir(WORKSPACE_PATH)

from proj_space import SPACE
sys.path.append(SPACE['CODE_FN']); SPACE['WORKSPACE_PATH'] = WORKSPACE_PATH

disable_caching()
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')

# Inference Entry

In [None]:
from recfldtkn.record_base.cohort import CohortFn, Cohort
from config.config_record.Cohort import CohortName_to_OneCohortArgs


CohortName = '20241013_InferencePttSampleV0'
CohortName_list = [CohortName]
CohortName_list_for_inference = CohortName_list
OneEntryArgs_items_for_inference = ['Input_Part']

Record_Proc_Config = {
    'save_data': False, 
    'load_data':False, 
    'via_method': 'df'
}

Case_Proc_Config = {
    'max_trigger_case_num': None, 
    'use_task_cache': True, 
    'caseset_chunk_size': 50000,
    'save_data': False, 
    'load_data': False, 
    'load_casecollection': False,
    'via_method': 'df',
    'n_cpus': 1, 
    'batch_size': None,  
}


In [None]:
# get an Inference_Entry
################
# replace this part with the API input. 
OneCohort_Args = CohortName_to_OneCohortArgs[CohortName]


Source2CohortName = OneCohort_Args['Source2CohortName']
cohort_fn = CohortFn(Source2CohortName, SPACE)
cohort = Cohort(OneCohort_Args, SPACE, cohort_fn)
cohort.setup_fn(cohort_fn)
cohort.initialize_cohort()

# Get Inference_Entry
SourceFile_List = cohort.SourceFile_List
OneCohort_Args = cohort.OneCohort_Args
get_RawName_from_SourceFile = cohort.get_RawName_from_SourceFile
get_InferenceEntry = cohort.cohort_fn.get_InferenceEntry
Inference_Entry = get_InferenceEntry(OneCohort_Args, 
                                     SourceFile_List, 
                                     get_RawName_from_SourceFile)

pprint(Inference_Entry['template_form'], sort_dicts=False, compact=True)
########################

In [None]:
print([i for i in Inference_Entry])

In [None]:
def TriggerFn_WeightEntry_v1211(inference_form):

    ########### ------ build the df_case and save to the Inference_Entry ------ ###########
    TriggerName = 'WeightEntry'
    DBTableName = 'ElogWeightEntry'
    df_weight = pd.DataFrame(inference_form[DBTableName])
    case = df_weight.iloc[-1]
    PatientID = case['PatientID']
    ObsDT = case['ObservationEntryDateTime']
    Weight = case['Weight']
    case = {
        'PatientID': PatientID,
        'ObsDT': ObsDT,
        # 'Weight': Weight
    }

    CaseTriggerList = [case]
    TriggerName_to_CaseTriggerList = {
        TriggerName: CaseTriggerList,
    }
    return TriggerName_to_CaseTriggerList


inferece_form_name = [i for i in Inference_Entry if 'inference_form' in i][0]
inference_form = Inference_Entry[inferece_form_name]
TriggerName_to_CaseTriggerList = TriggerFn_WeightEntry_v1211(inference_form)
TriggerName_to_dfCaseTrigger = {TriggerName: pd.DataFrame(CaseTriggerList) for TriggerName, CaseTriggerList in TriggerName_to_CaseTriggerList.items()}
Inference_Entry['TriggerName_to_dfCaseTrigger'] = TriggerName_to_dfCaseTrigger
TriggerName_to_dfCaseTrigger

# Config

In [None]:
MODEL_ENDPOINT = 'vTestWeight'
SPACE['MODEL_ENDPOINT'] = MODEL_ENDPOINT
pprint(SPACE, sort_dicts=False, compact=True)

# Step 1: Model_Base from a Model Version

In [None]:
from recfldtkn.model_base.model_base import Model_Base
from nn import load_model_instance_from_nn

ModelEndpoint_Path = os.path.join(SPACE['MODEL_ROOT'], SPACE['MODEL_ENDPOINT'])
model_base = Model_Base(
    ModelEndpoint_Path = ModelEndpoint_Path,
    load_model_instance_from_nn = load_model_instance_from_nn,
    SPACE = SPACE,
)

In [None]:
pprint(model_base.ModelArtifactName_to_ModelInfo, sort_dicts=False, compact=True)

for model_artifact_name, ModelInfo in model_base.ModelArtifactName_to_ModelInfo.items():
    # model_instance 
    print(model_artifact_name)
    model_artifact = ModelInfo['model_artifact']
    print({k: len(v['input_ids']['tid2tkn']) for k, v in model_artifact.aidata.CF_to_CFvocab.items()})
    # pprint(model_instance.aidata.EntryArgs['Input_FullArgs'])
    print()
    

# Step 2: InfoSettings

In [None]:
from inference.utils_inference import get_complete_InfoSettings

###################
# CFArgs_ForInference = None
InputCFArgs_ForInference = None # ['cf.TargetCGM_Bf24H'] # to remove in the future
###################

InfoSettings = get_complete_InfoSettings(model_base, CohortName_list, InputCFArgs_ForInference)

print([i for i in InfoSettings])
pprint(InfoSettings['TriggerCaseBaseName_to_TriggerCaseBaseArgs'], sort_dicts=False, compact=True)

# Step 3. AIData 

In [None]:
from recfldtkn.aidata_base.aidata_base import AIData_Base   

OneAIDataName_to_OneAIDataArgs = InfoSettings['OneAIDataName_to_OneAIDataArgs']
pprint({k:v['OneEntryArgs'] for k, v in OneAIDataName_to_OneAIDataArgs.items()}, sort_dicts=False, compact=True)  

In [None]:
OneAIDataName = [i for i in OneAIDataName_to_OneAIDataArgs][0]
OneAIDataArgs = OneAIDataName_to_OneAIDataArgs[OneAIDataName]
[i for i in OneAIDataArgs['OneEntryArgs']]

In [None]:
[i for i in OneAIDataName_to_OneAIDataArgs]

In [None]:
aidata_base = AIData_Base(
    OneAIDataName_to_OneAIDataArgs = OneAIDataName_to_OneAIDataArgs,
    OneEntryArgs_items_for_inference = OneEntryArgs_items_for_inference,
    CohortName_list_for_inference = CohortName_list_for_inference, 
    SPACE = SPACE, 
)

In [None]:
# pprint(aidata_base.AIDataHashName_to_AIDataArgs, sort_dicts=False, compact=True)

In [None]:
OneAIDataName = aidata_base.get_AIDataName_list()[0]
OneAIDataArgs = aidata_base.get_OneAIDataArgs_from_OneAIDataName(OneAIDataName)

OneEntryArgs = OneAIDataArgs['OneEntryArgs']
pprint(OneEntryArgs, sort_dicts=False, compact=True)
aidata = aidata_base.get_aidata_from_OneAIDataName(OneAIDataName)
aidata

In [None]:
# aidata_base.get_AIDataHashName_list()
Name_to_Data = aidata.Name_to_Data
[i for i in Name_to_Data]   

# Step 4: CF, HRF, TriggerCaseBaseName_to_CohortNameList

In [None]:
OneAIDataArgs['OneEntryArgs']

In [None]:

from recfldtkn.case_base.caseutils import get_ROCOGammePhiInfo_from_CFList


Case_Args_Settings = InfoSettings['Case_Args_Settings']
CF_to_CFArgs = Case_Args_Settings['CF_to_CFArgs']
TagCF_to_TagCFArgs = Case_Args_Settings.get('TagCF_to_TagCFArgs', {})

CF_list_ForInference = InfoSettings['CF_list_ForInference'] # (aidata_base, CF_to_CFArgs, TagCF_to_TagCFArgs)
pprint(CF_list_ForInference, sort_dicts=False)

ROCOGammaPhiInfo = get_ROCOGammePhiInfo_from_CFList(CF_list_ForInference, CF_to_CFArgs)
HumanRecordRecfeat_Args = ROCOGammaPhiInfo['HumanRecordRecfeat_Args']
pprint(HumanRecordRecfeat_Args, sort_dicts=False)


TriggerCaseBaseName_List = list(set([v['TriggerCaseBaseName'] for k, v in OneAIDataName_to_OneAIDataArgs.items()]))
TriggerCaseBaseName_to_CohortNameList = {}
for TriggerCaseBaseName in TriggerCaseBaseName_List:
    TriggerCaseBaseName_to_CohortNameList[TriggerCaseBaseName] = CohortName_list

pprint(TriggerCaseBaseName_to_CohortNameList, sort_dicts=False)

# Step 5: load AIData Model InfoSettings

In [None]:
ModelEndpoint_Path = ModelEndpoint_Path
print(ModelEndpoint_Path)

In [None]:
InputCFArgs_ForInference = InputCFArgs_ForInference
print(InputCFArgs_ForInference)

In [None]:
InferenceArgs = None
print(InferenceArgs)

In [None]:
INF_CohortName = CohortName 
print(INF_CohortName)
INF_OneCohortArgs = CohortName_to_OneCohortArgs[INF_CohortName]
print(INF_OneCohortArgs)

Package_Settings = {
    'INF_CohortName': INF_CohortName,
    'INF_OneCohortArgs': INF_OneCohortArgs,
    'Record_Proc_Config': Record_Proc_Config,
    'Case_Proc_Config': Case_Proc_Config,
    'OneEntryArgs_items_for_inference': OneEntryArgs_items_for_inference,
    'get_ROCOGammePhiInfo_from_CFList': get_ROCOGammePhiInfo_from_CFList,
    'load_model_instance_from_nn': load_model_instance_from_nn,
    'Model_Base': Model_Base,
    'AIData_Base': AIData_Base,
}

In [None]:
from inference.utils_inference import load_AIData_Model_InfoSettings


results = load_AIData_Model_InfoSettings(
    ModelEndpoint_Path = ModelEndpoint_Path, 
    InputCFArgs_ForInference = InputCFArgs_ForInference,
    InferenceArgs = InferenceArgs,
    SPACE = SPACE, 
    **Package_Settings
)


In [None]:
model_base = results['model_base']
aidata_base = results['aidata_base']
InfoSettings = results['InfoSettings']

# Step 6: record_base

In [None]:
print([i for i in Inference_Entry])

In [None]:
from datetime import datetime 
from recfldtkn.record_base import Record_Base
from config.config_record.Cohort import CohortName_to_OneCohortArgs


TriggerCaseBaseName_to_TriggerCaseBaseArgs = InfoSettings['TriggerCaseBaseName_to_TriggerCaseBaseArgs']

s = datetime.now()
record_base = Record_Base(CohortName_list, 
                            HumanRecordRecfeat_Args,
                            CohortName_to_OneCohortArgs,
                            SPACE = SPACE, 
                            Inference_Entry = Inference_Entry,
                            Record_Proc_Config = Record_Proc_Config,
                            )
e = datetime.now()
du1 = e-s

In [None]:
record_base.CohortName_to_OneCohortRecordBase

In [None]:
one_cohort_recordbase = record_base.CohortName_to_OneCohortRecordBase[CohortName]
one_cohort_recordbase

In [None]:
one_cohort_recordbase.TriggerName_to_dfCaseTrigger

# Step 7: case_base

In [None]:
from recfldtkn.case_base import Case_Base


s = datetime.now()
case_base = Case_Base(
    record_base = record_base, 
    TriggerCaseBaseName_to_CohortNameList = TriggerCaseBaseName_to_CohortNameList, 
    TriggerCaseBaseName_to_TriggerCaseBaseArgs = TriggerCaseBaseName_to_TriggerCaseBaseArgs,
    Case_Proc_Config = Case_Proc_Config,
    Case_Args_Settings = Case_Args_Settings, 
)
e = datetime.now()
du2 = e-s

In [None]:
case_base.TriggerCaseBaseName_to_CFtoCFvocab

for TriggerCaseBaseName, CF_to_CFVocab in case_base.TriggerCaseBaseName_to_CFtoCFvocab.items():
    print(TriggerCaseBaseName)
    print({k: len(v['input_ids']['tid2tkn']) for k, v in CF_to_CFVocab.items()})
    print()

In [None]:
case_base.TriggerCaseBaseName_to_CaseSetNameToCaseset

In [None]:
from IPython.display import display

case_base.TriggerCaseBaseName_to_CFtoCFvocab

for TriggerCaseBaseName, CaseSetName_to_CaseSet in case_base.TriggerCaseBaseName_to_CaseSetNameToCaseset.items():
    print(TriggerCaseBaseName)
    for CaseSetName, caseset in CaseSetName_to_CaseSet.items():
        print(CaseSetName)
        display(caseset.df_case)
        display(caseset.ds_case)

# Step 8: aidata_base

In [None]:
s = datetime.now()
aidata_base.update_CaseBase(case_base)
e = datetime.now()
du3 = e-s

In [None]:
AIDataName_list = aidata_base.get_AIDataName_list()
AIDataName_list

In [None]:
OneAIDataName = AIDataName_list[0]
OneAIDataName

In [None]:
aidata = aidata_base.get_aidata_from_OneAIDataName(OneAIDataName)

In [None]:
aidata.INPUT_CFs

In [None]:
aidata_base.get_AIDataName_list()

In [None]:
for OneAIDataHash, aidata in aidata_base.OneAIDataHash_to_AIData.items():
    print(OneAIDataHash)
    for Name, Data in aidata.Name_to_Data.items():
        ds_tfm = Data['ds_tfm']
        print(Name)
        print(ds_tfm)
        print()


# (0, 0)	1.0305
# (0, 1)	1.8262
# (0, 2)	11.2388
# (0, 3)	1.878
# (0, 4)	5.3921
# (0, 5)	116.3415
# (0, 6)	5.7057
# (0, 7)	194.8415
# (0, 8)	1.5461
# (0, 9)	0.4845
# (0, 10)	0.9927
# (0, 11)	0.0609
# (0, 12)	15.4268
# (0, 13)	119.5366
# (0, 14)	3.0037
# (0, 33)	230.7333
# (0, 36)	1.0
# (0, 37)	0.7
# (0, 38)	1.0
# (0, 39)	0.5615
# (0, 40)	1.0
# (0, 41)	0.6357
# (0, 46)	1.0
# (0, 47)	0.25
# (0, 58)	1.0
# :	:
# (0, 1023)	0.4787
# (0, 1024)	0.9807
# (0, 1025)	0.0602
# (0, 1026)	15.3012
# (0, 1027)	118.0964
# (0, 1028)	3.1
# (0, 1047)	230.7333
# (0, 1050)	1.0
# (0, 1051)	0.7
# (0, 1052)	1.0
# (0, 1053)	0.5615
# (0, 1054)	1.0
# (0, 1055)	0.5625
# (0, 1060)	1.0
# (0, 1061)	0.25
# (0, 1072)	1.0
# (0, 1076)	2.0515
# (0, 1078)	1053.4574
# (0, 1080)	1.0
# (0, 1081)	1.0
# (0, 1083)	0.5866
# (0, 1086)	2625.72
# (0, 1101)	1.0
# (0, 1107)	1.0
# (0, 1114)	1.0

print(ds_tfm['X'])

# Step 9: model_base

In [None]:
InferenceArgs = InfoSettings['InferenceArgs']

pprint(InferenceArgs, sort_dicts=False)

In [None]:
s = datetime.now()
# model_base.update_AIDataBase(aidata_base, update_df_modelinstance = False)
######## update te aidata_base to model_base ########
model_base.aidata_base = aidata_base
ModelArtifactName_to_ModelInfo = model_base.ModelArtifactName_to_ModelInfo
ModelArtifactName_to_Inference = {}
for model_artifact_name, ModelInfo in ModelArtifactName_to_ModelInfo.items():
    model_artifact = ModelInfo['model_artifact']
    OneAIDataName = model_artifact.aidata.OneAIDataName
    aidata = aidata_base.get_aidata_from_OneAIDataName(OneAIDataName)
    Name = [i for i in aidata.Name_to_Data][0]
    Data = aidata.Name_to_Data[Name]
    inference = model_artifact.inference(Data, InferenceArgs)
    # model_instance.model_checkpoint_name
    ModelArtifactName_to_Inference[model_artifact.model_artifact_name] = inference
e = datetime.now()
du4 = e-s

total_time = du1 + du2 + du3 + du4
print('record_base:', du1)
print('case_base:', du2)
print('aidata_base and model_base update:', du3)
print('model_infernece:', du4)
print('total_time:', total_time)

In [None]:
# aidata.EntryArgs

In [None]:
pprint(ModelArtifactName_to_Inference)
# {'weightpred/UniLabelPred-weightpred.Af1w-WeightPred.Af1M.WeightLossPctLarge2-XGBClassifierV0.6-2024.10.15-556770446746a3d1': {'y_pred_score': array([0.00042053], dtype=float32)}}

# Step 10: pipeline_inference_for_modelbase

In [None]:
from inference.utils_inference import pipeline_inference_for_modelbase

inference_results = pipeline_inference_for_modelbase(Inference_Entry,
                                                    Record_Base,
                                                    Case_Base,
                                                    aidata_base, 
                                                    model_base,
                                                    InfoSettings, 
                                                    SPACE)

In [None]:
pprint(inference_results, sort_dicts=False)

# Step 11: PostFn

In [None]:
from datetime import timezone

# def PostFn_WithActionDict_v1121(ModelArtifactName_to_Inference, SPACE):
#     output = {"models": [], "status": {"code": 200, "message": "Success"}}

#     for model_artifact_name, data in ModelArtifactName_to_Inference.items():
#         parts = model_artifact_name.split('__')

#         OneAIDataName, OneModelJobName, FingerPrint = parts[0], parts[1], parts[2]


#         OneDataName, OneDataVarientName = OneAIDataName.split('/')
#         # Extract model details
#         # model_name = '-'.join(parts[:2])  # e.g., Jardiance/UniLabelPred-Jardiance.RxAf1w
#         # model_name = model_name.split('/')[0].lower()
#         name = OneDataVarientName
#         # date = parts[4]  # e.g., 2024.11.03
#         # date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
#         date = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
#         # version = parts[3]  # e.g., XGBClassifierV0.6
        
#         version = SPACE['MODEL_ENDPOINT']
#         prediction_name = parts[2].replace("Rx.", "")  # e.g., Rx.Learn
#         # score = data['y_pred_score_percentile'][0]
#         score = data['y_pred_score'][0]

#         # Find or create model entry
#         model_entry = next((model for model in output["models"] if model["name"] == name), None)
#         if not model_entry:
#             model_entry = {"name": name, "date": date, "version": version, "predictions": [], "action": ""}
#             output["models"].append(model_entry)

#         # Append prediction details
#         model_entry["predictions"].append({"name": prediction_name, "score": score})

#     # Set the label as the name of the prediction with the highest score for each model
#     for model in output["models"]:
#         if model["predictions"]:
#             max_prediction = max(model["predictions"], key=lambda x: x["score"])
#             model["action"] = {'name': max_prediction["name"], 'score': max_prediction["score"]}

#     return output


# PostFn = PostFn_WithActionDict_v1121

In [None]:
from datetime import timezone

def PostFn_None(ModelArtifactName_to_Inference, SPACE):

    return ModelArtifactName_to_Inference

PostFn = PostFn_None

In [None]:
output = PostFn(ModelArtifactName_to_Inference, SPACE)

pprint(output, sort_dicts=False)