In [None]:
 %%sh
pip install -q --upgrade pip
pip install -q --upgrade boto3 botocore
pip install -q tqdm nibabel pydicom numpy pylibjpeg-openjpeg 

In [None]:
import pydicom
from pydicom.sequence import Sequence
from pydicom import Dataset , DataElement 
from pydicom.dataset import FileDataset, FileMetaDataset
from pydicom.uid import UID
from pydicom.pixel_data_handlers.util import convert_color_space , apply_color_lut
from openjpeg import decode
import array
import json
import logging
import importlib  
import boto3
import sagemaker
from sagemaker import get_execution_role
import io
import sys
import time
import os
import pandas as pd
from botocore.exceptions import ClientError
logging.basicConfig( level="INFO" )
# logging.basicConfig( level="DEBUG" )
from src.Api import MedicalImaging 
medicalimaging = MedicalImaging()

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name
bucket = sagemaker.Session().default_bucket()
role = get_execution_role()
print(f"S3 Bucket is {bucket}")
print(f"IAM role is {role}")

Copy over Coherent DICOM images to Default SageMaker S3 bucket

In [None]:
!aws s3 sync s3://guidance-multimodal-hcls-healthai-machinelearning-{region}/imaging s3://{bucket}/imaging/ 2>&1 > /dev/null

In [None]:
DatastoreName = "WorkshopDataStore"
datastoreList = medicalimaging.listDatastores()

res_createstore = None
for datastore in datastoreList["datastoreSummaries"]:
    if datastore["datastoreName"] == DatastoreName:
        res_createstore = datastore
        break
if res_createstore is None:        
    res_createstore = medicalimaging.createDatastore(DatastoreName)

datastoreId = res_createstore['datastoreId']
res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
status = res_getstore['datastoreProperties']['datastoreStatus']
while status!='ACTIVE':
    time.sleep(30)
    res_getstore = medicalimaging.getDatastore(res_createstore['datastoreId'])    
    status = res_getstore['datastoreProperties']['datastoreStatus']
    print(status)
print(f"datastoreId: {datastoreId}; status: {status}")

In [None]:
res_startimportjob = medicalimaging.startImportJob(
    res_createstore['datastoreId'],
    f"arn:aws:iam::{account_id}:role/HealthImagingImportJobRole",
    's3://'+bucket+'/imaging/', 
    's3://'+bucket+'/ahi_importjob_output/'
)

jobId = res_startimportjob['jobId']
jobstatus = medicalimaging.getImportJob(datastoreId, jobId)['jobProperties']['jobStatus']
while jobstatus!='COMPLETED':
    time.sleep(30)
    jobstatus = medicalimaging.getImportJob(datastoreId, jobId)['jobProperties']['jobStatus']
print(f"jobstatus is {jobstatus}")

In [None]:
imageSetIds = {}
s3=boto3.client('s3')
try:
    response = s3.head_object(Bucket=bucket, Key=f"ahi_importjob_output/{datastoreId}-DicomImport-{jobId}/job-output-manifest.json")
    if response['ResponseMetadata']['HTTPStatusCode'] == 200:
        data = s3.get_object(Bucket=bucket, Key=f"ahi_importjob_output/{datastoreId}-DicomImport-{jobId}/SUCCESS/success.ndjson")
        contents = data['Body'].read().decode("utf-8")
        for l in contents.splitlines():
            isid = json.loads(l)['importResponse']['imageSetId']
            if isid in imageSetIds:
                imageSetIds[isid]+=1
            else:
                imageSetIds[isid]=1
except ClientError:
    pass


print("number of image sets: {}".format(len(imageSetIds)))

In [None]:
%store datastoreId
%store imageSetIds
%store jobId

## (Optional) Save JSON to S3

In [None]:
for s in imageSetIds.keys():
    json_dicom_header = medicalimaging.getMetadata(datastoreId, s)
    patient = json_dicom_header['Patient']['DICOM']
    patient['imagesetid'] = s
    s3.put_object(
        Body=json.dumps(patient),
        Bucket=OutputBucketName,
        Key='dicom_header/json/patient/{}'.format(s)
    )
    study=json_dicom_header['Study']['DICOM']
    study['imagesetid'] = s
    s3.put_object(
        Body=json.dumps(study),
        Bucket=OutputBucketName,
        Key='dicom_header/json/study/{}'.format(s)
    )
    for se in list(json_dicom_header['Study']['Series'].keys()):
        s3.put_object(
            Body=json.dumps(json_dicom_header['Study']['Series'][se]['DICOM']),
            Bucket=OutputBucketName,
            Key='dicom_header/json/series/{}'.format(s)
        )
        for i in list(json_dicom_header['Study']['Series'][se]['Instances']):
            s3.put_object(
                Body=json.dumps(json_dicom_header['Study']['Series'][se]['Instances'][i]),
                Bucket=OutputBucketName,
                Key='dicom_header/json/series/{}'.format(s)
            )

## Clean Up

In [None]:
try:
    s3res = boto3.resource('s3')
    bucket = s3res.Bucket(InputBucketName)
    bucket.object_versions.delete()
    s3.delete_bucket(Bucket=InputBucketName)
    bucket = s3res.Bucket(OutputBucketName)
    bucket.object_versions.delete()
    s3.delete_bucket(Bucket=OutputBucketName)
except ClientError  as e:
    if e.response['Error']['Code'] == 'NoSuchBucket':
        print("Bucket already deleted")
    
try: 
    resp = iam.detach_role_policy(PolicyArn=respons_createpolicy['Policy']['Arn'],RoleName=response_createrole['Role']['RoleName'])
    resp = iam.delete_policy(PolicyArn=respons_createpolicy['Policy']['Arn'])
    resp = iam.delete_role(RoleName=response_createrole['Role']['RoleName'])
except ClientError as ee:
    if ee.response['Error']['Code'] == 'NoSuchEntity':
        print("Policy not attached, ignore")
    else: 
        print(ee)