In [None]:
import uuid
import json
from time import gmtime, strftime
import boto3
import sagemaker
from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup

role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
boto_session = boto3.Session(region_name=region)

account_id = boto3.client('sts').get_caller_identity().get('Account')

suffix=uuid.uuid1().hex # to be used in resource names

In [None]:
pwd

In [None]:
cd src

In [None]:
!sed -i "s|##REGION##|{region}|g" Dockerfile

In [None]:
!cat Dockerfile

Build a container image from the Dockerfile

In [None]:
!pip install -q sagemaker-studio-image-build

In [None]:
!sm-docker build . --repository medical-image-processing-smstudio:1.0

Define the input and output data location. Please insert your bucket names to `input_data_bucket` and `output_data_bucket`.

In [None]:
input_data_bucket='<your-s3-bucket-name>'
input_data_prefix='nsclc_radiogenomics'
input_data_uri='s3://%s/%s' % (input_data_bucket, input_data_prefix)
print(input_data_uri)

In [None]:
output_data_bucket='<your-s3-bucket-name>'
output_data_prefix='nsclc_radiogenomics'
output_data_uri='s3://%s/%s' % (output_data_bucket, output_data_prefix)
print(output_data_uri)

Be sure to use the image and tag name defined in `!sm-docker build` command. We will be replacing the placeholders in the Stepfunctions state machine definition json file with your bucket and image uri.

In [None]:
ecr_image_uri='%s.dkr.ecr.%s.amazonaws.com/medical-image-processing-smstudio:1.0' % (account_id, region)

In [None]:
!sed -i "s|##INPUT_DATA_S3URI##|{input_data_uri}|g" nsclc-radiogenomics-imaging-workflow.json
!sed -i "s|##OUTPUT_DATA_S3URI##|{output_data_uri}|g" nsclc-radiogenomics-imaging-workflow.json
!sed -i "s|##ECR_IMAGE_URI##|{ecr_image_uri}|g" nsclc-radiogenomics-imaging-workflow.json
!sed -i "s|##IAM_ROLE_ARN##|{role}|g" nsclc-radiogenomics-imaging-workflow.json

In [None]:
with open('nsclc-radiogenomics-imaging-workflow.json') as f:
    state_machine_json = json.load(f)

We need to create an IAM execution role for the Stepfunctions workflow.

In [None]:
iam = boto3.client('iam')

my_managed_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "events:PutTargets",
                "events:DescribeRule",
                "events:PutRule"
            ],
            "Resource": [
                "arn:aws:events:*:*:rule/StepFunctionsGetEventsForSageMakerTrainingJobsRule",
                "arn:aws:events:*:*:rule/StepFunctionsGetEventsForSageMakerTransformJobsRule",
                "arn:aws:events:*:*:rule/StepFunctionsGetEventsForSageMakerTuningJobsRule",
                "arn:aws:events:*:*:rule/StepFunctionsGetEventsForECSTaskRule",
                "arn:aws:events:*:*:rule/StepFunctionsGetEventsForBatchJobsRule"
            ]
        },
        {
            "Effect": "Allow",
            "Action": "iam:PassRole",
            "Resource": role,
            "Condition": {
                "StringEquals": {
                    "iam:PassedToService": "sagemaker.amazonaws.com"
                }
            }
        },
        {
            "Effect": "Allow",
            "Action": [
                "sagemaker:CreateEndpoint",
                "sagemaker:CreateEndpointConfig",
                "sagemaker:CreateHyperParameterTuningJob",
                "sagemaker:CreateModel",
                "sagemaker:CreateProcessingJob",
                "sagemaker:CreateTrainingJob",
                "sagemaker:CreateTransformJob",
                "sagemaker:DeleteEndpoint",
                "sagemaker:DeleteEndpointConfig",
                "sagemaker:DescribeHyperParameterTuningJob",
                "sagemaker:DescribeProcessingJob",
                "sagemaker:DescribeTrainingJob",
                "sagemaker:DescribeTransformJob",
                "sagemaker:ListProcessingJobs",
                "sagemaker:ListTags",
                "sagemaker:StopHyperParameterTuningJob",
                "sagemaker:StopProcessingJob",
                "sagemaker:StopTrainingJob",
                "sagemaker:StopTransformJob",
                "sagemaker:UpdateEndpoint",
            ],
            "Resource": "*"
        }
    ]
}

trust_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "",
            "Effect": "Allow",
            "Principal": {
            "Service": ["states.amazonaws.com", "sagemaker.amazonaws.com"]},
            "Action": "sts:AssumeRole"
        }
    ]
}

In [None]:
policy_name = 'MyStepFunctionsWorkflowExecutionPolicy-%s' % suffix
role_name = 'MyStepFunctionsWorkflowExecutionRole-%s' % suffix
policy_response = iam.create_policy(
  PolicyName=policy_name,
  PolicyDocument=json.dumps(my_managed_policy)
)

role_response = iam.create_role(
    RoleName=role_name,
    AssumeRolePolicyDocument=json.dumps(trust_policy),
    Description='Role to execute StepFunctions workflow which submits SageMaker jobs',
    MaxSessionDuration=3600,
)

# Attach a policy to role
iam.attach_role_policy(
    PolicyArn=policy_response['Policy']['Arn'],
    RoleName=role_name
)
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/CloudWatchEventsFullAccess',
    RoleName=role_name
)

Create a Stepfunctions workflow, i.e. a state machine.

In [None]:
sfn = boto3.client('stepfunctions')
sfn_execution_role = role_response['Role']['Arn']
state_machine_name = 'nsclc-radiogenomics-imaging-workflow-%s' % suffix
sfn_response = sfn.create_state_machine(name = state_machine_name,
                                        definition = json.dumps(state_machine_json),
                                        roleArn = sfn_execution_role,
                                        type = 'STANDARD')

We will be running this workflow for all the `RO1` subjects.

In [None]:
subject_list = ['R01-%03d'%i for i in range(1,163)]

Execute!

In [None]:
stateMachineArn=sfn_response['stateMachineArn']

feature_store_name = 'imaging-feature-group-%s' % suffix
processing_job_name = 'dcm-nifti-conversion-%s' % suffix
offline_store_s3uri = '%s/multimodal-imaging-featurestore' % output_data_uri
payload = {
  "PreprocessingJobName": processing_job_name,
  "FeatureStoreName": feature_store_name,
  "OfflineStoreS3Uri": offline_store_s3uri,
  "Subject": subject_list
}
exeution_response = sfn.start_execution(stateMachineArn=stateMachineArn,
                                        name=suffix,
                                        input=json.dumps(payload))

In [None]:
print(exeution_response)