In [28]:
## This notebook is for adding metrics 

In [29]:
import os
import time
import json
import boto3
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
import sagemaker
from sagemaker.tensorflow import TensorFlow
from datetime import datetime

In [30]:
bucket = 'techsummit2023mlops'

In [31]:
sess = boto3.Session()
sm = sess.client("sagemaker")
role = get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)
region = boto3.Session().region_name
model_package_group_name = "tech-sum-2023-package"  # Model name in model registry
prefix = "fashion-model"
pipeline_name = "tech-sum-2023-pipeline-modular"  # SageMaker Pipeline name
current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())

In [32]:
import uuid
  
id = uuid.uuid4()

id = str(id)

print(id)

35206ad0-14c9-478c-8da8-f61db9588f70


In [33]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)


In [34]:
from sagemaker.tensorflow import TensorFlowProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker import get_execution_role
from sagemaker.workflow.pipeline_context import PipelineSession

pipeline_session = PipelineSession() # this step is making the below processor to not run now and wait for pipeline start.

region = boto3.session.Session().region_name

role = get_execution_role()

BUCKET = 'techsummit2023mlops'
# train_path = 'DataProcessed/' + str(datetime.date(datetime.now())) + '/' + id + '/train'
# val_path = 'DataProcessed/' + str(datetime.date(datetime.now())) + '/' + id + '/val'
# test_path = 'DataProcessed/' + str(datetime.date(datetime.now())) + '/' + id + '/test'

train_path =  str(datetime.date(datetime.now())) + '/' + id + '/DataProcessed' + '/train'
val_path =  str(datetime.date(datetime.now())) + '/' + id + '/DataProcessed' + '/val'
test_path = str(datetime.date(datetime.now())) + '/' + id + '/DataProcessed' + '/test'


#Initialize the TensorFlowProcessor

tp = TensorFlowProcessor(
    framework_version='2.11.0',
    role=get_execution_role(),
    instance_type='ml.m5.xlarge',
    instance_count=1,
    base_job_name='frameworkprocessor-TF',
    py_version='py39',
    sagemaker_session=pipeline_session  # Note the last line in the above definition  "sagemaker_session=pipeline_session " has been added to make the processor part of the pipeline session
)


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


In [35]:
source_data = "s3://techsummit2023mlops/raw_data/"

In [36]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep


processor_args = tp.run(
    code='preprocessing.py',
    source_dir='script_processing',
    inputs=[
        ProcessingInput(input_name='source_data',source=source_data, destination="/opt/ml/processing/source_data")
    ],
    outputs=[
        ProcessingOutput(output_name="train_output_data", source="/opt/ml/processing/train/X_train", destination=f's3://{BUCKET}/{train_path}/X_train'),
        ProcessingOutput(output_name="train_output_label", source="/opt/ml/processing/train/y_train", destination=f's3://{BUCKET}/{train_path}/y_train'),
        ProcessingOutput(output_name="val_output_data", source="/opt/ml/processing/val/X_val", destination=f's3://{BUCKET}/{val_path}/X_val'),
        ProcessingOutput(output_name="val_output_label", source="/opt/ml/processing/val/y_val", destination=f's3://{BUCKET}/{val_path}/y_val'),
        ProcessingOutput(output_name="test_data_eval", source="/opt/ml/processing/test", destination=f's3://{BUCKET}/{test_path}')    
    ]
)





In [37]:
step_process = ProcessingStep(name="PreprocessingStep", step_args=processor_args)

In [38]:
import os

import sagemaker
from sagemaker.tensorflow import TensorFlow


In [39]:
from sagemaker.inputs import TrainingInput


inputs={
        "train_output_data": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train_output_data"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "train_output_label": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train_output_label"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "val_output_data": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["val_output_data"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "val_output_label": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["val_output_label"].S3Output.S3Uri,
            content_type="text/csv",
        )
    }


In [40]:
bucket_name = "techsummit2023mlops"
model_dir = "s3://techsummit2023mlops/training-output"
checkpoint_in_bucket="checkpoints"

# this path is for the upload location of keras weights for training job
model_trained_base_loc = str(datetime.date(datetime.now())) 
model_dir="s3://{}/{}/{}/{}".format(bucket_name, model_trained_base_loc, id, 'training-output')

# The S3 URI to store the checkpoints
checkpoint_s3_bucket="s3://{}/{}/{}/{}".format(bucket_name, model_trained_base_loc, id, checkpoint_in_bucket)

# The local path where the model will save its checkpoints in the training container
checkpoint_local_path="/opt/ml/checkpoints"

In [41]:
hyperparameters={
    "output_dir": "/opt/ml/model",
    "bucket_name": bucket_name,
    "model_dir": model_dir
    }

In [42]:
from sagemaker.tensorflow import TensorFlow
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.pipeline import Pipeline


import time

train_instance_type = "ml.m5.xlarge"

estimator = TensorFlow(
    entry_point="main.py",
    source_dir="script_train",
    instance_type=train_instance_type,
    instance_count=1,
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),  # Passes to the container the AWS role that you are using on this notebook
    framework_version="2.11.0",
    py_version="py39",
    base_job_name="TrainingStep",
    checkpoint_s3_uri=checkpoint_s3_bucket,       ## https://docs.aws.amazon.com/sagemaker/latest/dg/model-checkpoints.html
    checkpoint_local_path=checkpoint_local_path, ## https://docs.aws.amazon.com/sagemaker/latest/dg/model-checkpoints.html
    output_path=model_dir,    ## defines the S3 path to save the model and outputs
    metric_definitions=[{'Name': 'Epoch', 'Regex': 'Epoch=(.*?);'},
    {'Name': 'Train_Loss=', 'Regex': 'Train_Loss=(.*?);'},
                        ------------------------------------------------ Lab2 - Pipeline ----------------------------------------------------------------------

)

# Use the tf2_estimator in a Sagemaker pipelines ProcessingStep.
# NOTE how the input to the training job directly references the output of the previous step.
step_train_model = TrainingStep(
    name="Training-Step-Model",
    estimator=estimator,
    inputs=inputs
)

In [43]:
from sagemaker.tensorflow import TensorFlowProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker import get_execution_role
from sagemaker.workflow.pipeline_context import PipelineSession


region = boto3.session.Session().region_name

pipeline_session = PipelineSession() # this step is making the below processor to not run now and wait for pipeline start.


role = get_execution_role()

eval_path = str(datetime.date(datetime.now())) + '/' + id + '/DataEvaluation' + '/metrics/'

# eval_path = 'DataEvaluation/metrics/'

#Initialize the TensorFlowProcessor


tp = TensorFlowProcessor(
    framework_version='2.11.0',
    role=get_execution_role(),
    instance_type='ml.m5.xlarge',
    instance_count=1,
    base_job_name='frameworkprocessor-TF',
    py_version='py39',
    sagemaker_session=pipeline_session,
)

#Run the processing job
evaluation_args = tp.run(
    code='evaluation-sg-metrics.py',
    source_dir='script_evaluation',
    inputs=[
        ProcessingInput(input_name='model_data',source=step_train_model.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model_data"), 
        ProcessingInput(input_name='test_data_eval',source=step_process.properties.ProcessingOutputConfig.Outputs["test_data_eval"].S3Output.S3Uri, destination="/opt/ml/processing/test_data_eval"),
        ProcessingInput(input_name='test_data_dummy',source=step_train_model.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/test_data_dummy")
    ],
    outputs=[
        ProcessingOutput(output_name="eval_output_data", source="/opt/ml/processing/eval_output", destination=f's3://{BUCKET}/{eval_path}'),
    ]
)




INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


In [44]:
step_evaluate = ProcessingStep(name="EvaluationStep", step_args=evaluation_args)

In [45]:
from sagemaker.model import Model

image_uri = sagemaker.image_uris.retrieve(
    framework="tensorflow",
    region=region,
    version="1.11.0",
    py_version="py39",
    instance_type="ml.m5.xlarge",
    image_scope="inference",
)

model = Model(
    image_uri=image_uri,
    model_data=step_train_model.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

# Define the ModelStep by providing the return values from model.create() as the step arguments.

from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep

step_create_model = ModelStep(
    name="tech-sum-2023-CreateModel",
    step_args=model.create(instance_type="ml.m5.large", accelerator_type="ml.eia1.medium")
)



INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py39.


In [46]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics

## Make sure you validate ht output of below has the correct ModelMetrics S3uri and its a valid JSON file
## aws sagemaker  describe-model-package --model-package-name arn:aws:sagemaker:ap-southeast-2:97077XXXXXX:model-package/tech-sum-2023-package/8
model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}evaluation_metrics.json".format(
            step_evaluate.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json",
    )
)

customer_metadata_properties = {"id": id}

register_args = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    model_metrics=model_metrics,
    customer_metadata_properties=customer_metadata_properties
)
step_register = ModelStep(name="tech-sum-2023-RegisterModel", step_args=register_args)



INFO:sagemaker.processing:Uploaded script_evaluation to s3://sagemaker-ap-southeast-2-970770904251/frameworkprocessor-TF-2023-09-03-17-18-18-533/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/frameworkprocessor-TF-2023-09-03-17-18-18-533/source/runproc.sh


Using provided s3_resource


In [51]:
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline import PipelineExperimentConfig, ExecutionVariables

pipeline = Pipeline(
    name=pipeline_name,
    pipeline_experiment_config=PipelineExperimentConfig(
      ExecutionVariables.PIPELINE_NAME,
      ExecutionVariables.PIPELINE_EXECUTION_ID
    ),
    steps=[step_process,step_train_model, step_evaluate, step_register]
)

pipeline.upsert(role_arn=role)
definition = json.loads(pipeline.definition())
execution = pipeline.start()
execution.wait()

INFO:sagemaker.processing:Uploaded script_processing to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/84cbb526e57fc4f29b718ecc2ee37e52/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.estimator:SMDebug Does Not Currently Support                         Distributed Training Jobs With Checkpointing Enabled
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.processing:Uploaded script_evaluation to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/7a1da8bc5eb0194330ca7ab5e2b348d3/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/41b9977adda4039cbc46a75b62f887bc/runproc.sh


Using provided s3_resource


INFO:sagemaker.processing:Uploaded script_processing to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/84cbb526e57fc4f29b718ecc2ee37e52/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.processing:Uploaded script_evaluation to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/7a1da8bc5eb0194330ca7ab5e2b348d3/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/41b9977adda4039cbc46a75b62f887bc/runproc.sh


Using provided s3_resource


INFO:sagemaker.processing:Uploaded script_processing to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/84cbb526e57fc4f29b718ecc2ee37e52/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Using provided s3_resource
Using provided s3_resource


INFO:sagemaker.processing:Uploaded script_evaluation to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/7a1da8bc5eb0194330ca7ab5e2b348d3/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-southeast-2-970770904251/tech-sum-2023-pipeline-modular/code/41b9977adda4039cbc46a75b62f887bc/runproc.sh


Using provided s3_resource
