[![Licence](https://img.shields.io/badge/license-MIT-blue)](https://opensource.org/license/mit/)

# MONAI on AWS Workshop

Setup notebook environment using "PyTorch 1.12 Python 3.8 CPU optimized" Kernel with "t3.medium" instance type. 

<img src="../Figures/studio_setup_cpu.png" width="600">

## Download and install libraries

In [None]:
%env PIP_DISABLE_PIP_VERSION_CHECK True
%env PIP_ROOT_USER_ACTION ignore

!pip install -q --upgrade pip
!pip install -q --upgrade boto3 botocore
!pip install -q tqdm nibabel pydicom numpy pathlib2 pylibjpeg-openjpeg
!pip install -q "itk>=5.3rc4" "itkwidgets[all]>=1.0a23"
!pip install --upgrade -q AHItoDICOMInterface

%load_ext autoreload
%autoreload 2

### Import Libraries and Setup Environments

In [None]:
import json
import logging 
import boto3
import io
import sys
import time
import os
import pandas as pd
import sagemaker
from sagemaker import get_execution_role
from botocore.exceptions import ClientError
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig( level="INFO" )
# logging.basicConfig( level="DEBUG" )
s3 = boto3.client('s3')

account_id = boto3.client("sts").get_caller_identity()["Account"]
session = sagemaker.session.Session()
region = boto3.Session().region_name
bucket = sagemaker.Session().default_bucket()
role = f"arn:aws:iam::{account_id}:role/HealthImagingImportJobRole"  ## use this role if you have deployed the CloudFormation template described above
# role = get_execution_role()                ## use this role if you want to use SageMaker Execution role to import image into AWS HealthImaging
print(f"S3 Bucket is {bucket}")
print(f"IAM role for image import job is {role}")

suffix = int(time.time())
ahi_input_prefix = f'monaideploy_tutorial_images_{suffix}/'
ahi_output_prefix = f'monaideploy_tutorial_importjobs_{suffix}/'
dicom_header_prefix = f'tutorial_dicom_headers_{suffix}/'

print(f"S3 prefix for input images is {ahi_input_prefix}")
print(f"S3 prefix for import job outputs is {ahi_output_prefix}")

### Create S3 Bucket and IAM policy for HealthLake Imaging Import job (Optional)

If you did not deploy the CloudFormation template available in Pre-requisites, you can run through the following cell to create necessary IAM role for AWS HealthImaging import job. Please make sure your SageMaker execution role has permission to do this

In [None]:
assume_role_policy_document = json.dumps({
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": [
                    "medical-imaging.amazonaws.com"
                ]
            },
            "Action": "sts:AssumeRole"
        }
    ]
})

import_policy_document = json.dumps({
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetEncryptionConfiguration"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(InputBucketName),
                "arn:aws:s3:::{}".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetEncryptionConfiguration"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(InputBucketName),
                "arn:aws:s3:::{}".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:GetObject"
            ],
            "Resource": [
                "arn:aws:s3:::{}/*".format(InputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::{}/*".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "medical-imaging:CreateDatastore",
                "medical-imaging:GetDatastore",
                "medical-imaging:ListDatastores",
                "medical-imaging:UpdateDatastore",
                "medical-imaging:StartDICOMImportJob",
                "medical-imaging:GetDICOMImportJob",
                "medical-imaging:UpdateDICOMImportJob",
                "medical-imaging:GetDICOMStudyMetdata",
                "medical-imaging:GetImageFrame"
            ],
            "Resource": "*",
            "Effect": "Allow"
        }
    ]
})

try:
    respons_createpolicy = iam.get_policy(PolicyArn=f"arn:aws:iam::{account_id}:policy/HealthImagingImportJobRole-Policy")
except:
    respons_createpolicy = iam.create_policy(
         PolicyName='HealthImagingImportJobRole-Policy',
         PolicyDocument=import_policy_document,
     )
try:
    response_createrole=iam.get_role(RoleName='HealthImagingImportJobRole')
except:
    response_createrole = iam.create_role(
         RoleName='HealthImagingImportJobRole',
         AssumeRolePolicyDocument=assume_role_policy_document
     )
    response = iam.attach_role_policy(
         RoleName=response_createrole['Role']['RoleName'],
         PolicyArn=respons_createpolicy['Policy']['Arn']
    )
print(response_createrole['Role']['Arn'])
print(respons_createpolicy['Policy']['Arn'])

### Download AI Spleen Segmentation Dataset

In [None]:
import os
if not os.path.exists('dcm'):
    # Download ai_spleen_bundle_data test data zip file
    !pip install gdown 
    !gdown "https://drive.google.com/uc?id=1Uds8mEvdGNYUuvFpTtCQ8gNU97bAPCaQ"

    # After downloading ai_spleen_bundle_data zip file from the web browser or using gdown,
    !unzip -o "ai_spleen_seg_bundle_data.zip"
    
for f in os.listdir('dcm'):
    s3.upload_file('dcm/'+f, bucket, ahi_input_prefix + f)

## Define Python Client for AWS HealthImaging

In [None]:
class MedicalImaging: 
    def __init__(self, client):
        self.client = client
    
    def stopwatch(self, start_time, end_time):
        time_lapsed = end_time - start_time
        return time_lapsed*1000 

    def listDatastores(self):
        start_time = time.time()
        response = self.client.list_datastores()
        end_time = time.time()
        logging.info(f"List Datastores  : {self.stopwatch(start_time,end_time)} ms")        
        return response
    
    def createDatastore(self, datastoreName):
        start_time = time.time()
        response = self.client.create_datastore(datastoreName=datastoreName)
        end_time = time.time()
        logging.info(f"Create Datastore  : {self.stopwatch(start_time,end_time)} ms")        
        return response
    
    def getDatastore(self, datastoreId):
        start_time = time.time()
        response = self.client.get_datastore(datastoreId=datastoreId)
        end_time = time.time()
        logging.info(f"Get Datastore  : {self.stopwatch(start_time,end_time)} ms")        
        return response
    
    def deleteDatastore(self, datastoreId):
        start_time = time.time()
        response = self.client.delete_datastore(datastoreId=datastoreId)
        end_time = time.time()
        logging.info(f"Delete Datastore  : {self.stopwatch(start_time,end_time)} ms")        
        return response
    
    def startImportJob(self, datastoreId, IamRoleArn, inputS3, outputS3):
        start_time = time.time()
        response = self.client.start_dicom_import_job(
            datastoreId=datastoreId,
            dataAccessRoleArn = IamRoleArn,
            inputS3Uri = inputS3,
            outputS3Uri = outputS3,
            clientToken = "demoClient"
        )
        end_time = time.time()
        logging.info(f"Start Import Job  : {self.stopwatch(start_time,end_time)} ms")        
        return response
    
    def getImportJob(self, datastoreId, jobId):
        start_time = time.time()
        response = self.client.get_dicom_import_job(datastoreId=datastoreId, jobId=jobId)
        end_time = time.time()
        logging.info(f"Get Import Job  : {self.stopwatch(start_time,end_time)} ms")        
        return response

    def getMetadata(self, datastoreId, imageSetId):
        start_time = time.time()
        dicom_study_metadata = self.client.get_image_set_metadata(datastoreId=datastoreId , imageSetId=imageSetId )
        json_study_metadata = json.loads( gzip.decompress(dicom_study_metadata["imageSetMetadataBlob"].read()) )
        end_time = time.time()
        logging.info(f"Metadata fetch  : {self.stopwatch(start_time,end_time)} ms")   
        return json_study_metadata

### Create HealthLake Imaging Datastore if not Exists

In [None]:
medicalimaging = MedicalImaging( boto3.client('medical-imaging') )
DatastoreName = "AISpleenDataStore"
datastoreList = medicalimaging.listDatastores()

res_createstore = None
for datastore in datastoreList["datastoreSummaries"]:
    if datastore["datastoreName"] == DatastoreName:
        res_createstore = datastore
        break
if res_createstore is None:        
    res_createstore = medicalimaging.createDatastore(DatastoreName)

datastoreId = res_createstore['datastoreId']
res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
status = res_getstore['datastoreProperties']['datastoreStatus']
while status!='ACTIVE':
    time.sleep(10)
    res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
    status = res_getstore['datastoreProperties']['datastoreStatus']
    print(status)
print(f"datastoreId: {datastoreId}; status: {status}")

### Import data into HealthLake Imaging 

In [None]:
res_startimportjob = medicalimaging.startImportJob(
    res_createstore['datastoreId'],
    role,
    f"s3://{bucket}/{ahi_input_prefix}", 
    f"s3://{bucket}/{ahi_output_prefix}"
)

jobId = res_startimportjob['jobId']
jobstatus = medicalimaging.getImportJob(datastoreId, jobId)['jobProperties']['jobStatus']
while jobstatus not in ['COMPLETED', 'FAILED']:
    time.sleep(30)
    jobstatus = medicalimaging.getImportJob(datastoreId, jobId)['jobProperties']['jobStatus']
print(f"jobstatus is {jobstatus}")

### Retrieve ImageSet IDs in Output Bucket

In [None]:
imageSetIds = {}
try:
    response = s3.head_object(Bucket=bucket, Key=f"{ahi_output_prefix}{datastoreId}-DicomImport-{jobId}/job-output-manifest.json")
    if response['ResponseMetadata']['HTTPStatusCode'] == 200:
        data = s3.get_object(Bucket=bucket, Key=f"{ahi_output_prefix}{datastoreId}-DicomImport-{jobId}/SUCCESS/success.ndjson")
        contents = data['Body'].read().decode("utf-8")
        for l in contents.splitlines():
            isid = json.loads(l)['importResponse']['imageSetId']
            if isid in imageSetIds:
                imageSetIds[isid]+=1
            else:
                imageSetIds[isid]=1
except ClientError:
    pass

imageSetIds

## Image Visualization 

In [None]:
import itk
from itkwidgets import view
logging.getLogger('AHItoDICOMInterface').setLevel(logging.CRITICAL)
import warnings
warnings.filterwarnings('ignore')

from AHItoDICOMInterface.AHItoDICOM import AHItoDICOM
helper = AHItoDICOM()
instances = helper.DICOMizeImageSet(datastore_id=datastoreId , image_set_id=next(iter(imageSetIds)))

In [None]:
img = itk.image_view_from_array([ins.pixel_array for ins in instances])
viewer = view(img)
viewer.set_image_gradient_opacity(0.5)

In [None]:
%store datastoreId
%store imageSetIds

### Clean Up

In [None]:
## S3 bucket
s3 = boto3.client('s3')
try:
    objects=s3.list_objects(Bucket=bucket, Prefix=dicom_header_prefix)
    for object in objects['Contents']:
        s3.delete_object(Bucket=bucket, Key=object['Key'])
    s3.delete_object(Bucket=bucket, Key=dicom_header_prefix)
    objects=s3.list_objects(Bucket=bucket, Prefix=ahi_input_prefix)
    for object in objects['Contents']:
        s3.delete_object(Bucket=bucket, Key=object['Key'])
    s3.delete_object(Bucket=bucket, Key=ahi_input_prefix)
    objects=s3.list_objects(Bucket=bucket, Prefix=ahi_output_prefix)
    for object in objects['Contents']:
        s3.delete_object(Bucket=bucket, Key=object['Key'])
    s3.delete_object(Bucket=bucket, Key=ahi_output_prefix)
except ClientError  as e:
    logging.error(
        "Couldn't delete S3 folder %s. Here's why: %s: %s", name, err.response['Error']['Code'], err.response['Error']['Message'])
    raise
