# Amazon HealthLake Imaging Lab

### Download libraries

In [None]:
%%sh
pip install -q --upgrade pip
pip install -q --upgrade boto3 botocore
pip install -q tqdm nibabel pydicom numpy pathlib2 pylibjpeg-openjpeg #monai

### Import Libraries and Setup Environments

In [None]:
import pydicom
from pydicom.sequence import Sequence
from pydicom import Dataset , DataElement 
from pydicom.dataset import FileDataset, FileMetaDataset
from pydicom.uid import UID
from pydicom.pixel_data_handlers.util import convert_color_space , apply_color_lut
from openjpeg import decode
import array
import json
import logging
import importlib  
import boto3
import io
import sys
import time
import os
import pandas as pd
from botocore.exceptions import ClientError
from src.code.Api import MedicalImaging 

# logging.basicConfig( level="INFO" )
logging.basicConfig( level="DEBUG" )


s3 = boto3.client('s3')
iam = boto3.client('iam')

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name

medicalimaging = MedicalImaging()

### Create S3 Bucket and IAM policy for HealthLake Imaging Import job

In [None]:
InputBucketName = "ahli-input-aispleen-dataset"
OutputBucketName = "ahli-output-aispleen-dataset"
s3.create_bucket(Bucket=InputBucketName)
s3.create_bucket(Bucket=OutputBucketName)

assume_role_policy_document = json.dumps({
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": [
                    "medical-imaging.amazonaws.com"
                ]
            },
            "Action": "sts:AssumeRole"
        }
    ]
})

import_policy_document = json.dumps({
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetEncryptionConfiguration"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(InputBucketName),
                "arn:aws:s3:::{}".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetEncryptionConfiguration"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(InputBucketName),
                "arn:aws:s3:::{}".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:GetObject"
            ],
            "Resource": [
                "arn:aws:s3:::{}/*".format(InputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::{}/*".format(OutputBucketName)
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "medical-imaging:CreateDatastore",
                "medical-imaging:GetDatastore",
                "medical-imaging:ListDatastores",
                "medical-imaging:UpdateDatastore",
                "medical-imaging:StartDICOMImportJob",
                "medical-imaging:GetDICOMImportJob",
                "medical-imaging:UpdateDICOMImportJob",
                "medical-imaging:GetDICOMStudyMetdata",
                "medical-imaging:GetImageFrame"
            ],
            "Resource": "*",
            "Effect": "Allow"
        }
    ]
})

try:
    respons_createpolicy = iam.get_policy(PolicyArn=f"arn:aws:iam::{account_id}:policy/AHLI-Service-Import-Policy")
except:
    respons_createpolicy = iam.create_policy(
         PolicyName='AHLI-Service-Import-Policy',
         PolicyDocument=import_policy_document,
     )
try:
    response_createrole=iam.get_role(RoleName='AHLI-Service-Import-Role')
except:
    response_createrole = iam.create_role(
         RoleName='AHLI-Service-Import-Role',
         AssumeRolePolicyDocument=assume_role_policy_document
     )
    response = iam.attach_role_policy(
         RoleName=response_createrole['Role']['RoleName'],
         PolicyArn=respons_createpolicy['Policy']['Arn']
    )
print(response_createrole['Role']['Arn'])
print(respons_createpolicy['Policy']['Arn'])

### Download AI Spleen Segmentation Dataset

To demonstrate MONAI Deploy SDK later on, this demo is based on [an existing MONAI Deploy tutorial](https://github.com/Project-MONAI/monai-deploy-app-sdk/blob/main/notebooks/tutorials/03_segmentation_app.ipynb) using a public example: a multi-frame 3D Spleen Segmentation image.

> **NOTICE**: "The following download link leads to a Third-Party Dataset. AWS does not own, nor does it have any control over the Third-Party Dataset. You should perform your own independent assessment, and take measures to ensure that you comply with your own specific quality control practices and standards, and the local rules, laws, regulations, licenses and terms of use that apply to you, your content, and the Third-Party Dataset. AWS does not make any representations or warranties that the Third-Party Dataset is secure, virus-free, accurate, operational, or compatible with your own environment and standards. AWS does not make any representations, warranties or guarantees that any information in the Third-Party Dataset will result in a particular outcome or result."

In [None]:
import os
if not os.path.exists('dcm'):
    # Download ai_spleen_bundle_data test data zip file
    !pip install gdown 
    !gdown "https://drive.google.com/uc?id=1Uds8mEvdGNYUuvFpTtCQ8gNU97bAPCaQ"

    # After downloading ai_spleen_bundle_data zip file from the web browser or using gdown,
    !unzip -o "ai_spleen_seg_bundle_data.zip"
    
    for f in os.listdir('dcm'):
        s3.upload_file('dcm/'+f, InputBucketName,'aispleen/'+f)

### Create HealthLake Imaging Datastore if not Exists

In [None]:
DatastoreName = "AISpleenDataStore"
datastoreList = medicalimaging.listDatastores()

res_createstore = None
for datastore in datastoreList["datastoreSummaries"]:
    if datastore["datastoreName"] == DatastoreName:
        res_createstore = datastore
        break
if res_createstore is None:        
    res_createstore = medicalimaging.createDatastore(DatastoreName)

datastoreId = res_createstore['datastoreId']
res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
status = res_getstore['datastoreProperties']['datastoreStatus']
while status!='ACTIVE':
    time.sleep(10)
    res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
    status = res_getstore['datastoreProperties']['datastoreStatus']
    print(status)
print(f"datastoreId: {datastoreId}; status: {status}")

### Import data into HealthLake Imaging 

In [None]:
res_startimportjob = medicalimaging.startImportJob(
    res_createstore['datastoreId'],
    response_createrole['Role']['Arn'],
    's3://'+InputBucketName+'/aispleen/', 
    's3://'+OutputBucketName+'/output/'
)

jobstatus = medicalimaging.getImportJob(datastoreId, res_startimportjob['jobId'])['jobProperties']['jobStatus']
while jobstatus!='COMPLETED':
    time.sleep(30)
    jobstatus = medicalimaging.getImportJob(datastoreId, res_startimportjob['jobId'])['jobProperties']['jobStatus']
print(f"jobstatus is {jobstatus}")

### Retrieve ImageSet IDs in Output Bucket

In [None]:
imageSetIds = {}
try:
    response = s3.head_object(Bucket=OutputBucketName, Key=f"output/{res_createstore['datastoreId']}-DicomImport-{res_startimportjob['jobId']}/job-output-manifest.json")
    if response['ResponseMetadata']['HTTPStatusCode'] == 200:
        data = s3.get_object(Bucket=OutputBucketName, Key=f"output/{res_createstore['datastoreId']}-DicomImport-{res_startimportjob['jobId']}/SUCCESS/success.ndjson")
        contents = data['Body'].read().decode("utf-8")
        for l in contents.splitlines():
            isid = json.loads(l)['importResponse']['imageSetId']
            if isid in imageSetIds:
                imageSetIds[isid]+=1
            else:
                imageSetIds[isid]=1
except ClientError:
    pass

imageSetIds

### Retrieve DICOM Header for the Given ImageSet

In [None]:
patients = []
studies = []
seriess = []
for s in imageSetIds.keys():
    json_dicom_header = medicalimaging.getMetadata(datastoreId, s)
    patientjson = json_dicom_header['Patient']['DICOM']
    studyjson = json_dicom_header['Study']['DICOM']
    patientjson['ImageSetID'] = json_dicom_header['ImageSetID']
    studyjson['ImageSetID'] = json_dicom_header['ImageSetID']
    patients.append(patientjson)
    studies.append(studyjson)
    for s in list(json_dicom_header['Study']['Series'].keys()):
        seriesjson = json_dicom_header['Study']['Series'][s]['DICOM']
        seriesjson['ImageSetID'] = json_dicom_header['ImageSetID']
        seriess.append(seriesjson)
    
df_patients = pd.json_normalize(patients)
df_studies = pd.json_normalize(studies)
df_seriess = pd.json_normalize(seriess)

In [None]:
df_patients

In [None]:
%store datastoreId
%store imageSetIds

### Clean Up

In [None]:
try:
    s3res = boto3.resource('s3')
    bucket = s3res.Bucket(InputBucketName)
    bucket.object_versions.delete()
    s3.delete_bucket(Bucket=InputBucketName)
    bucket = s3res.Bucket(OutputBucketName)
    bucket.object_versions.delete()
    s3.delete_bucket(Bucket=OutputBucketName)
except ClientError  as e:
    if e.response['Error']['Code'] == 'NoSuchBucket':
        print("Bucket already deleted")
    
try: 
    resp = iam.detach_role_policy(PolicyArn=respons_createpolicy['Policy']['Arn'],RoleName=response_createrole['Role']['RoleName'])
    resp = iam.delete_policy(PolicyArn=respons_createpolicy['Policy']['Arn'])
    resp = iam.delete_role(RoleName=response_createrole['Role']['RoleName'])
except ClientError as ee:
    if ee.response['Error']['Code'] == 'NoSuchEntity':
        print("Policy not attached, ignore")
    else: 
        print(ee)
