In [None]:
#Install sagemaker_studio_image_build utility
import sys

!{sys.executable} -m pip install sagemaker_studio_image_build

In [None]:
!cat Dockerfile

In [None]:
%%sh

sm-docker build . --repository bc-test-fcst:3.0

In [None]:
## Create processing job using the customer container built in the above cell
import os
import json
import boto3
import time
from sagemaker import get_execution_role

sm = boto3.client('sagemaker')

# Get parameters
input_uri = "s3://sagemaker-us-east-1-674880395054/legacycode/data/input/"
ouput_uri = "s3://sagemaker-us-east-1-674880395054/legacycode/data/output/"
image_uri = "674880395054.dkr.ecr.us-east-1.amazonaws.com/bc-test-fcst:3.0"
role = get_execution_role()
instance_type = "ml.m5.xlarge"
volume_size = 20
max_runtime = 3600  # Default: 1h
entrypoint = "/opt/ml/code/predict.py"

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'sm-job-{timestamp}' 

# Define inputs/outputs

create_processing_params = {
    "ProcessingInputs": [
            {
                'InputName': 'input_data',
                'S3Input': {
                    'S3Uri': input_uri,
                    'LocalPath': '/opt/ml/processing/input',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            }
    ],
    "ProcessingOutputConfig": {
        'Outputs': [
            {
                'OutputName': 'output_data',
                'S3Output': {
                    'S3Uri': ouput_uri,
                    'LocalPath': '/opt/ml/processing/output',
                    'S3UploadMode': 'EndOfJob'
                }
            }
        ]
    },
    "ProcessingJobName": job_name,
    "ProcessingResources": {
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': instance_type,
            'VolumeSizeInGB': volume_size
        }
    },
    "StoppingCondition": {
        'MaxRuntimeInSeconds': max_runtime
    },
    "AppSpecification": {
        'ImageUri': image_uri,
        'ContainerEntrypoint': ['python', entrypoint]
    },
    "RoleArn": role
}
# Create processing job and return job ARN
sm.create_processing_job(**create_processing_params)

In [None]:
job_name = 'sm-job-20230110-011002'

response = sm.describe_processing_job(
    ProcessingJobName=job_name
)
job_status = response["ProcessingJobStatus"]

print(job_status)

In [None]:
from sagemaker import image_uris
image_uris.retrieve(framework='sklearn',region='us-east-1',version='0.23-1',image_scope='training')

In [None]:
## Run as Training job and pass the script during run time using scikit-learn image
import os
import json
import boto3
import time
from sagemaker import get_execution_role


sm = boto3.client('sagemaker')

# Get parameters
input_uri = "s3://sagemaker-us-east-1-674880395054/legacycode/data/input/"
ouput_uri = "s3://sagemaker-us-east-1-674880395054/legacycode/data/output/"
code_uri = "s3://sagemaker-us-east-1-674880395054/legacycode/code/"
image_uri = "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
role = get_execution_role()
instance_type = "ml.m5.xlarge"
volume_size = 20
max_runtime = 3600  # Default: 1h
entrypoint = "/opt/ml/input/data/custom_code/preprocess.py"

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'sm-tr-job-{timestamp}' 

print("Training job", job_name)

create_training_params = {
    "AlgorithmSpecification": {"TrainingImage": image_uri, 
                               "TrainingInputMode": "File",
                               "ContainerEntrypoint": ["python"],
                               "ContainerArguments": ["/opt/ml/input/data/custom_code/preprocess.py"]
                              },
    "RoleArn": role,
    "OutputDataConfig": {"S3OutputPath": ouput_uri},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": instance_type, "VolumeSizeInGB": 20},
    "TrainingJobName": job_name,
    "StoppingCondition": {"MaxRuntimeInSeconds": 60 * 60},
    "InputDataConfig": [
        {
            "ChannelName": "custom_code",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": code_uri,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None",
        },
        {
            "ChannelName": "source",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": input_uri,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None",
        }
    ]
}


sagemaker = boto3.client("sagemaker")

sagemaker.create_training_job(**create_training_params)


In [None]:
job_name='sm-tr-job-20230110-002816'
status = sagemaker.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
print(status)

In [None]:
## Create processing job Lambda
import os
import json
import boto3
import time

from typing import Optional


sm = boto3.client('sagemaker')


def lambda_handler(event, context):

    # Get parameters
    input_uri = event['S3Input']
    ouput_uri = event['S3Output']
    image_uri = event['ImageUri']
    role = event['RoleArn']
    instance_type = event['InstanceType']
    volume_size = event['VolumeSizeInGB']
    max_runtime = event.get('MaxRuntimeInSeconds', 3600)  # Default: 1h
    container_arguments = event.get('ContainerArguments', None) # Optional
    entrypoint = event.get('EntryPoint', None) #optional 

    timestamp = time.strftime('%Y%m%d-%H%M%S')
    job_name = f'sagemaker-job-{timestamp}' 

    # Define inputs/outputs

    create_processing_params = {
        "ProcessingInputs": [
                {
                    'InputName': 'input_data',
                    'S3Input': {
                        'S3Uri': input_uri,
                        'LocalPath': '/opt/ml/processing/input',
                        'S3DataType': 'S3Prefix',
                        'S3InputMode': 'File'
                    }
                }
        ],
        "ProcessingOutputConfig": {
            'Outputs': [
                {
                    'OutputName': 'output_data',
                    'S3Output': {
                        'S3Uri': ouput_uri,
                        'LocalPath': '/opt/ml/processing/output',
                        'S3UploadMode': 'EndOfJob'
                    }
                }
            ]
        },
        "ProcessingJobName": job_name,
        "ProcessingResources": {
            'ClusterConfig': {
                'InstanceCount': 1,
                'InstanceType': instance_type,
                'VolumeSizeInGB': volume_size
            }
        },
        "StoppingCondition": {
            'MaxRuntimeInSeconds': max_runtime
        },
        "AppSpecification": {
            'ImageUri': image_uri,
            'ContainerEntrypoint': ['python', entrypoint]
        },
        "RoleArn": role
    }
    
    # Create processing job and return job ARN
    sm.create_processing_job(**create_processing_params)

    return {
        'JobName': job_name
    }

In [None]:
import boto3

sm = boto3.client('sagemaker')

def lambda_handler(event, context):

    job_name = event['JobName']

    response = sm.describe_processing_job(
        ProcessingJobName=job_name
    )
    job_status = response["ProcessingJobStatus"]
    
    return {
        'ProcessingJobStatus': job_status
    }

In [None]:
#Install requirements locally
!{sys.executable} -m pip install -r src/requirements.txt

In [None]:
#Test locally
!python /root/Blog_LegacyCode_MLOps/Template/InferenceContainer/src/predict.py local /root/Blog_LegacyCode_MLOps/Template/data/