# <B> Model Deployment </B>
* Container: codna_pytorch_py39

## AutoReload

In [None]:
%load_ext autoreload
%autoreload 2

## 1. parameter store 설정

In [None]:
import boto3
from utils.ssm import parameter_store

In [None]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)
prefix = pm.get_params(key="PREFIX")

## 2.package import for model deployment

In [None]:
import os
import sagemaker
from sagemaker.pytorch.model import PyTorchModel

In [None]:
from time import strftime
from smexperiments.trial import Trial
from smexperiments.experiment import Experiment

In [None]:
def create_experiment(experiment_name):
    try:
        sm_experiment = Experiment.load(experiment_name)
    except:
        sm_experiment = Experiment.create(experiment_name=experiment_name)

In [None]:
def create_trial(experiment_name):
    create_date = strftime("%m%d-%H%M%s")
    sm_trial = Trial.create(trial_name=f'{experiment_name}-{create_date}',
                            experiment_name=experiment_name)

    job_name = f'{sm_trial.trial_name}'
    return job_name

In [None]:
local_mode = True

if local_mode: 
    inference_instance_type = 'local_gpu'
    
    import os
    from sagemaker.local import LocalSession
    
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
    
else:
    inference_instance_type = "ml.g4dn.xlarge"
    sagemaker_session = sagemaker.Session()
    


sagemaker_role_arn = pm.get_params(key=prefix + '-SAGEMAKER-ROLE-ARN')    
bucket_name = pm.get_params(key=prefix + '-BUCKET')
model_artifact_s3_uri = pm.get_params(key=prefix + '-MODEL-PATH')
inf_image_uri = pm.get_params(key=''.join([prefix, '-INF-IMAGE-URI']))

code_location= os.path.join(
    "s3://{}".format(bucket_name),
    prefix,
    "inference",
    "backup_codes"
)

monitor_output= os.path.join(
    "s3://{}".format(bucket_name),
    prefix,
    "inference",
    "monitor_output"
)

print (f"sagemaker_role_arn: {sagemaker_role_arn}")
print (f"model_artifact_s3_uri: {model_artifact_s3_uri}")
print (f"inf_image_uri: {inf_image_uri}")
print (f"code_location: {code_location}")
print (f"monitor_output: {monitor_output}")

* Define inference job

In [None]:
model = PyTorchModel(
    entry_point='predictor.py',
    source_dir=os.getcwd() + '/code',
    code_location=code_location,
    model_data=model_artifact_s3_uri,
    role=sagemaker_role_arn,
    image_uri=inf_image_uri,
    # framework_version="1.13.1",
    # py_version="py39",
    sagemaker_session=sagemaker_session
)

In [None]:
if local_mode: 
    data_capture_config = None
else:
    from sagemaker.model_monitor import DataCaptureConfig

    data_capture_config = DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
        destination_s3_uri=monitor_output
    )

In [None]:
experiment_name = pm.get_params(key=prefix + "-EXPERI-NAME")
create_experiment(experiment_name)
job_name = create_trial(experiment_name)


predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    data_capture_config=data_capture_config,
    endpoint_name=job_name,
    experiment_config={
      'TrialName': job_name,
      'TrialComponentDisplayName': job_name,
    }
)

In [None]:
paths2audio_files = f"{os.getcwd()}/data/preprocessing/an4/wav/an4test_clstk/fcaw/an406-fcaw-b.wav"
paths2audio_files

In [None]:
import librosa
import IPython.display as ipd

# Load and listen to the audio file
audio, sample_rate = librosa.load(paths2audio_files)

ipd.Audio(paths2audio_files, rate=sample_rate)

In [None]:
from sagemaker.predictor import Predictor

In [None]:
# predictor = Predictor('nemo-cyj-nemo-experiments-0322-10501679482211')

In [None]:
from sagemaker.serializers import DataSerializer
predictor.serializer = DataSerializer()

In [None]:
predictor.predict(paths2audio_files)

In [None]:
pm.put_params(key="ENDPOINT-NAME", value=job_name, overwrite=True)
pm.put_params(key="MONITOR-OUTPUT", value=monitor_output, overwrite=True)

In [None]:
print (f'ENDPOINT-NAME: {pm.get_params(key="ENDPOINT-NAME")}')
print (f'MONITOR-OUTPUT: {pm.get_params(key="MONITOR-OUTPUT")}')

In [None]:
# predictor.delete_endpoint()