In [None]:
#Install sagemaker_studio_image_build utility
import sys

!{sys.executable} -m pip install sagemaker_studio_image_build

In [None]:
!cat Dockerfile

In [None]:
%%sh

sm-docker build . --repository legacycode:1.0

In [None]:
from sagemaker import get_execution_role
role = get_execution_role()
print(role)

In [None]:
from sagemaker import image_uris
image_uris.retrieve(framework='sklearn',region='us-east-1',version='0.23-1',image_scope='training')

In [None]:
## Create pre-processing job in script mode job using the pre-built sci-kit learn container
import os
import json
import boto3
import time
from sagemaker import get_execution_role

sm = boto3.client('sagemaker')

# Get parameters
input_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/preproc/input/"
ouput_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/predict/input/"
code_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/scripts/"
image_uri = "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
role = get_execution_role()
instance_type = "ml.m5.xlarge"
volume_size = 20
max_runtime = 3600  # Default: 1h

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'sm-job-{timestamp}' 

# Define inputs/outputs

create_processing_params = {
    "ProcessingInputs": [
            {
                'InputName': 'input_data',
                'S3Input': {
                    'S3Uri': input_uri,
                    'LocalPath': '/opt/ml/processing/input/data/',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            },
            {
                'InputName': 'scripts',
                'S3Input': {
                    'S3Uri': code_uri,
                    'LocalPath': '/opt/ml/processing/input/scripts/',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            }
    ],
    "ProcessingOutputConfig": {
        'Outputs': [
            {
                'OutputName': 'output_data',
                'S3Output': {
                    'S3Uri': ouput_uri,
                    'LocalPath': '/opt/ml/processing/output',
                    'S3UploadMode': 'EndOfJob'
                }
            }
        ]
    },
    "ProcessingJobName": job_name,
    "ProcessingResources": {
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': instance_type,
            'VolumeSizeInGB': volume_size
        }
    },
    "StoppingCondition": {
        'MaxRuntimeInSeconds': max_runtime
    },
    "AppSpecification": {
        'ImageUri': image_uri,
        'ContainerEntrypoint': ['python',"/opt/ml/processing/input/scripts/preprocess.py"]
    },
    "RoleArn": role
}
# Create processing job and return job ARN
sm.create_processing_job(**create_processing_params)

In [None]:
## Create processing job using the customer container built above
import os
import json
import boto3
import time
from sagemaker import get_execution_role

sm = boto3.client('sagemaker')

# Get parameters
input_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/predict/input/"
ouput_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/predict/output/"
image_uri = "598227499323.dkr.ecr.us-east-1.amazonaws.com/legacycode:1.0"
role = get_execution_role()
instance_type = "ml.m5.xlarge"
volume_size = 20
max_runtime = 3600  # Default: 1h
entrypoint = "/opt/ml/code/predict.py"

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'sm-job-{timestamp}' 

# Define inputs/outputs

create_processing_params = {
    "ProcessingInputs": [
            {
                'InputName': 'input_data',
                'S3Input': {
                    'S3Uri': input_uri,
                    'LocalPath': '/opt/ml/processing/input',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            }
    ],
    "ProcessingOutputConfig": {
        'Outputs': [
            {
                'OutputName': 'output_data',
                'S3Output': {
                    'S3Uri': ouput_uri,
                    'LocalPath': '/opt/ml/processing/output',
                    'S3UploadMode': 'EndOfJob'
                }
            }
        ]
    },
    "ProcessingJobName": job_name,
    "ProcessingResources": {
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': instance_type,
            'VolumeSizeInGB': volume_size
        }
    },
    "StoppingCondition": {
        'MaxRuntimeInSeconds': max_runtime
    },
    "AppSpecification": {
        'ImageUri': image_uri,
        'ContainerEntrypoint': ['python', entrypoint]
    },
    "RoleArn": role
}
# Create processing job and return job ARN
sm.create_processing_job(**create_processing_params)

In [None]:
## Create post-processing job in script mode job using the pre-built sci-kit learn container
import os
import json
import boto3
import time
from sagemaker import get_execution_role

sm = boto3.client('sagemaker')

# Get parameters
input_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/predict/output/"
ouput_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/data/preproc/output/"
code_uri = "s3://sagemaker-us-east-1-111111111111/legacycode/scripts/"
image_uri = "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
role = get_execution_role()
instance_type = "ml.m5.xlarge"
volume_size = 20
max_runtime = 3600  # Default: 1h

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'sm-job-{timestamp}' 

# Define inputs/outputs

create_processing_params = {
    "ProcessingInputs": [
            {
                'InputName': 'input_data',
                'S3Input': {
                    'S3Uri': input_uri,
                    'LocalPath': '/opt/ml/processing/input/data/',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            },
            {
                'InputName': 'scripts',
                'S3Input': {
                    'S3Uri': code_uri,
                    'LocalPath': '/opt/ml/processing/input/scripts/',
                    'S3DataType': 'S3Prefix',
                    'S3InputMode': 'File'
                }
            }
    ],
    "ProcessingOutputConfig": {
        'Outputs': [
            {
                'OutputName': 'output_data',
                'S3Output': {
                    'S3Uri': ouput_uri,
                    'LocalPath': '/opt/ml/processing/output',
                    'S3UploadMode': 'EndOfJob'
                }
            }
        ]
    },
    "ProcessingJobName": job_name,
    "ProcessingResources": {
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': instance_type,
            'VolumeSizeInGB': volume_size
        }
    },
    "StoppingCondition": {
        'MaxRuntimeInSeconds': max_runtime
    },
    "AppSpecification": {
        'ImageUri': image_uri,
        'ContainerEntrypoint': ['python'],
        "ContainerArguments": [
          "/opt/ml/processing/input/scripts/preprocess.py"
        ]
    },
    "RoleArn": role
}
# Create processing job and return job ARN
sm.create_processing_job(**create_processing_params)

In [None]:
#Check processing job status
job_name = 'sm-job-20230208-025115'

response = sm.describe_processing_job(
    ProcessingJobName=job_name
)
job_status = response["ProcessingJobStatus"]

print(job_status)

In [None]:
#Install requirements locally
!{sys.executable} -m pip install -r src/requirements.txt

In [None]:
#Test locally
!python /root/legacyCode_mlOps/InferenceContainer/src/predict.py local /root/data/