# Space Settings

In [None]:
# This is the file that implements a flask server to do inferences. It's the file that you will modify to
# implement the scoring for your own algorithm.

from __future__ import print_function
import io
import os
import sys
import json
# import flask
import logging
import datasets
import traceback
import pandas as pd
from pprint import pprint
from datetime import datetime 
# from flask import Flask, request, jsonify, Response

logging.basicConfig(level=logging.INFO, format='[%(levelname)s:%(asctime)s:(%(filename)s@%(lineno)d %(name)s)]: %(message)s')
logger = logging.getLogger(__name__)

In [None]:
# parent_directory = '/dbfs/FileStore/Sid_Files/Deployment-v1119'
parent_directory = '.'

os.chdir(parent_directory)

In [None]:

def process_inference_SPACE(SPACE, MODEL_ENDPOINT):

    assert 'MODEL_ROOT' in SPACE, "Invalid SPACE: missing MODEL_ROOT"   
    
    # pipeline from ModelVersion/pipeline
    SPACE['CODE_FN'] = os.path.join(SPACE['MODEL_ROOT'], MODEL_ENDPOINT, 'pipeline')
    assert os.path.exists(SPACE['CODE_FN']), f"Invalid CODE_FN: {SPACE['CODE_FN']}"
    # external from ModelVersion/external
    SPACE['DATA_EXTERNAL'] = os.path.join(SPACE['MODEL_ROOT'], MODEL_ENDPOINT, 'external')
    assert os.path.exists(SPACE['DATA_EXTERNAL']), f"Invalid DATA_EXTERNAL: {SPACE['DATA_EXTERNAL']}"

    SPACE['DATA_RAW'] = os.path.join(SPACE['MODEL_ROOT'], MODEL_ENDPOINT)
    assert os.path.exists(SPACE['DATA_RAW']), f"Invalid DATA_EXTERNAL: {SPACE['DATA_RAW']}"

    SPACE['DATA_INFERENCE'] = os.path.join(SPACE['MODEL_ROOT'], MODEL_ENDPOINT, 'inference')
    assert os.path.exists(SPACE['DATA_INFERENCE']), f"Invalid DATA_EXTERNAL: {SPACE['DATA_INFERENCE']}"

    SPACE['MODEL_ENDPOINT'] = MODEL_ENDPOINT
    return SPACE

In [None]:
# ############################
# # ----------- environment for Estimator.deploy() -----------
# MODEL_ROOT          = '../../../_Model'           # '/opt/ml/model' in sagemaker
# MODEL_ENDPOINT      = 'vTestWeight' # 'vTestCGMFull'
# INF_CohortName      = '20241013_InferencePttSampleV0'
# INF_OneCohortArgs   = {'CohortLabel': 9,
#                        'CohortName': '20241013_InferencePttSampleV0',
#                        'FolderPath': '$DATA_RAW$/inference/',
#                        'SourcePath': 'patient_sample',
#                        'Source2CohortName': 'InferencePttSampleV0'}
# INF_CFArgs          = None 
# INF_Args            = None 

# PostFnName = "PostFn_NaiveForUniLabelPred" # "EngagementPredToLabel"
# TrigFnName = 'TriggerFn_WeightEntry_v1211' 
# MetaFnName = 'MetaFn_None'

# POST_PROCESS_SCRIPT = None # 'pipeline/inference/post_process.py' # by default, use this script
# LoggerLevel         = "INFO"
# ############################


In [None]:
###########################
MODEL_ROOT          = '../../../_Model'           # '/opt/ml/model' in sagemaker
MODEL_ENDPOINT      = 'vTestCGMFull' # 'vTestWeight' # 
INF_CohortName      = '20241013_InferencePttSampleV0'
INF_OneCohortArgs   = {'CohortLabel': 9,
                       'CohortName': '20241013_InferencePttSampleV0',
                       'FolderPath': '$DATA_RAW$/inference/',
                       'SourcePath': 'patient_sample',
                       'Source2CohortName': 'InferencePttSampleV0'}
INF_CFArgs          = ['cf.TargetCGM_Bf24H'] 
INF_Args            = {'GEN_Args': {
                            'num_first_tokens_for_gen': 289,
                            'max_new_tokens': 24,
                            'do_sample': False,
                            'items_list': ['hist', 'pred', 'logit_scores']}
                      } 
MetaFnName = 'MetaFn_None'
TrigFnName = 'TriggerFn_CGM5MinEntry_v1211' 
PostFnName = "PostFn_WithCGMPred_v1210" # "EngagementPredToLabel"
POST_PROCESS_SCRIPT = None # 'pipeline/inference/post_process.py' # by default, use this script
LoggerLevel         = "INFO"
###########################


In [None]:
############################# # image your are in the sagemaker container
MODEL_ROOT        = os.environ.get('MODEL_ROOT', MODEL_ROOT)
MODEL_ENDPOINT    = os.environ.get('MODEL_ENDPOINT', MODEL_ENDPOINT)
INF_CohortName    = os.environ.get('INF_COHORT_NAME', INF_CohortName)
INF_CohortArgs    = os.environ.get('INF_COHORT_ARGS', INF_OneCohortArgs)
InputCFArgs_ForInference = os.environ.get('INF_CFArgs', INF_CFArgs)
InferenceArgs     = os.environ.get('INF_Args', INF_Args)   

PostFnName = os.environ.get('PostFnName', PostFnName)
TrigFnName = os.environ.get('TrigFnName', TrigFnName)
MetaFnName = os.environ.get('MetaFnName', MetaFnName)

LoggerLevel       = os.environ.get('LOGGER_LEVEL', LoggerLevel)
#############################


In [None]:
SPACE = {'MODEL_ROOT': MODEL_ROOT}  
SPACE = process_inference_SPACE(SPACE, MODEL_ENDPOINT)
if SPACE['CODE_FN'] not in sys.path:
    sys.path.append(SPACE['CODE_FN'])
    sys.path = list(set(sys.path))

# MlFlow Databrick

In [None]:
from recfldtkn.record_base.cohort import CohortFn, Cohort
from recfldtkn.case_base.caseutils import get_ROCOGammePhiInfo_from_CFList
from recfldtkn.aidata_base.aidata_base import AIData_Base 
from recfldtkn.record_base.record_base import Record_Base
from recfldtkn.case_base.case_base import Case_Base
from recfldtkn.model_base.model_base import Model_Base
from recfldtkn.base import fill_missing_keys


from nn import load_model_instance_from_nn

from inference.utils_inference import (
    load_AIData_Model_InfoSettings,
    load_Inference_Entry_Example,
    pipeline_inference_for_modelbase,
    Record_Proc_Config,
    Case_Proc_Config,
    OneEntryArgs_items_for_inference,
)

from inference.post_process import NAME_TO_FUNCTION


In [None]:
import mlflow.pyfunc
import os
import sys
import logging
import shutil
from datetime import datetime

In [None]:
MetaFn = NAME_TO_FUNCTION[MetaFnName]
TrigFn = NAME_TO_FUNCTION[TrigFnName]
PostFn = NAME_TO_FUNCTION[PostFnName]


# # --------- meta_results ---------
# meta_results = MetaFn(SPACE)
# if meta_results is None:
#     print('No meta_results')
# else:
#     metadata_response = meta_results.get('metadata_response', None)
#     pprint('metadata_response:', metadata_response)


In [None]:
# --------- load context ---------

ModelEndpoint_Path = os.path.join(SPACE['MODEL_ROOT'], SPACE['MODEL_ENDPOINT'])
assert os.path.exists(ModelEndpoint_Path), f"Invalid ModelEndpoint_Path: {ModelEndpoint_Path}"

CohortName_to_OneCohortArgs = {INF_CohortName: INF_OneCohortArgs}

Package_Settings = {
    'INF_CohortName': INF_CohortName,
    'INF_OneCohortArgs': INF_OneCohortArgs,
    'Record_Proc_Config': Record_Proc_Config,
    'Case_Proc_Config': Case_Proc_Config,
    'OneEntryArgs_items_for_inference': OneEntryArgs_items_for_inference,
    'get_ROCOGammePhiInfo_from_CFList': get_ROCOGammePhiInfo_from_CFList,
    'load_model_instance_from_nn': load_model_instance_from_nn,
    'Model_Base': Model_Base,
    'AIData_Base': AIData_Base,
}

Context = load_AIData_Model_InfoSettings(
    ModelEndpoint_Path = ModelEndpoint_Path,
    InputCFArgs_ForInference = InputCFArgs_ForInference, 
    InferenceArgs = InferenceArgs, 
    SPACE = SPACE,
    **Package_Settings,
)

model_base = Context['model_base']
aidata_base = Context['aidata_base']
InfoSettings = Context['InfoSettings']

In [None]:
Inference_Entry_Example = load_Inference_Entry_Example(INF_CohortName, 
                                                            CohortName_to_OneCohortArgs,
                                                            Cohort,
                                                            CohortFn,
                                                            SPACE)

# Run Local

In [None]:
#####################
# json_payload_path = 'data_weight.json'
json_payload_path = 'data_cgm.json'
#####################

with open(json_payload_path, 'r') as f:
    json_payload = json.load(f)


df_model_input = pd.DataFrame(json_payload['dataframe_records'])
df_model_input

In [None]:
model_input = df_model_input.iloc[0].to_dict()

In [None]:

TriggerName_to_CaseTriggerList = model_input['TriggerName_to_CaseTriggerList']          
TriggerName_to_dfCaseTrigger = {k: pd.DataFrame(v) for k, v in TriggerName_to_CaseTriggerList.items()}
# TriggerName_to_dfCaseTrigger

for TriggerName, df in TriggerName_to_dfCaseTrigger.items():
    # CaseTriggerList = TrigFn(dfCaseTrigger, model_input, Context)

    if 'ObsDT' not in df.columns:
        df['ObsDT'] = pd.to_datetime(df['ObsDT_UTC']) + pd.to_timedelta(df['TimezoneOffset'], 'm')
        
    TriggerName_to_dfCaseTrigger[TriggerName] = df
    # TriggerName_to_CaseTriggerList[TriggerName] = CaseTriggerList

TriggerName_to_dfCaseTrigger

In [None]:

Inference_Entry_Final = {}
Inference_Entry_Final['TriggerName_to_dfCaseTrigger'] = TriggerName_to_dfCaseTrigger
Inference_Entry_Final['inference_form'] = model_input['inference_form']
Inference_Entry_Final['template_form'] = Inference_Entry_Example['template_form']
# pipeline_inference_for_modelbase = pipeline_inference_for_modelbase

inference_results = pipeline_inference_for_modelbase(
    Inference_Entry = Inference_Entry_Final,
    Record_Base = Record_Base, 
    Case_Base = Case_Base,
    aidata_base = aidata_base, 
    model_base = model_base,
    InfoSettings = InfoSettings, 
    SPACE = SPACE
)

print([i for i in inference_results])

In [None]:
ModelArtifactName_to_Inference = inference_results['ModelArtifactName_to_Inference']
pprint(ModelArtifactName_to_Inference)

In [None]:
record_base = inference_results['record_base']
CohortName = INF_CohortName
onecohort_recordbase = record_base.CohortName_to_OneCohortRecordBase[CohortName]
Name_to_HRF = onecohort_recordbase.Name_to_HRF

for Name, HRF in Name_to_HRF.items():
    if len(Name) == 2:
        print(f"RecordName: {Name}")

        df = HRF.df_RecAttr
        print(df.shape)
        display(df.head())
        print('===========\n')

    elif len(Name) == 3:
        print(f"RecFeatName: {Name}")
        df = HRF.df_RecFeat
        print(df.shape)
        display(df.head())
        print('===========\n')


In [None]:
case_base = inference_results['case_base']

TriggerCaseBaseName = [i for i in case_base.TriggerCaseBaseName_to_CaseSetNameToCaseset][0]

print(TriggerCaseBaseName)

CaseSetNameToCaseSet = case_base.TriggerCaseBaseName_to_CaseSetNameToCaseset[TriggerCaseBaseName]
CaseSetNameToCaseSet

CaseSetName = [i for i in CaseSetNameToCaseSet][0]
caseset = CaseSetNameToCaseSet[CaseSetName]


df_case = caseset.df_case
display(df_case.head())

ds_case = caseset.ds_case
display(ds_case)

In [None]:


# # ----------------------------------------------------
# du1 = inference_results['du1']
# du2 = inference_results['du2']
# du3 = inference_results['du3']
# du4 = inference_results['du4']
# total_time = inference_results['total_time']

# logger.info(f"record_base: {du1}")
# logger.info(f"case_base: {du2}")
# logger.info(f"aidata_base and model_base update: {du3}")
# logger.info(f"model_infernece: {du4}")
# logger.info(f"total_time: {total_time}")

# print(inference_results)

# ModelCheckpointName_to_InferenceInfo = inference_results['ModelCheckpointName_to_InferenceInfo']
        
# # for k, v in ModelCheckpointName_to_InferenceInfo.items():
# #     # print(k)
# #     v = {k1: list(v1) for k1, v1 in v.items()}
    
# ModelCheckpointName_to_InferenceInfo = {
#     k: {k1: [round(float(i), 4) for i in list(v1)] for k1, v1 in v.items()} for k, v in ModelCheckpointName_to_InferenceInfo.items()
# }

# self.logger.info("Successfully ran prediction")
# return ModelCheckpointName_to_InferenceInfo


# except Exception as e:
# self.logger.error(f"Prediction failed: {str(e)}")
# raise
