Create collection of CT scan JPGs and store in /sample

# Import

In [116]:
from azure.storage.blob import BlobServiceClient
from pydicom import dcmread
from io import BytesIO
import numpy as np
import cv2
import pylidc as pl

# Connect to Blob

In [117]:
with open('/home/andrew/ITRI-LungCancer/keys.txt', 'r') as file:
    data = file.read().splitlines()
    account_name    = data[0]
    account_key     = data[1]
    container_name  = data[2]

blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
container_client = blob_service_client.get_container_client(container_name)
blob_name_list = container_client.list_blob_names()

# Helper Functions for Creating Dataset

In [118]:
def window_img(img, window_center, window_width):
    win_min = window_center - window_width / 2.0
    win_max = window_center + window_width / 2.0
    img = np.clip(img, win_min, win_max)
    img = (img - win_min) / (win_max - win_min)
    img = np.uint8(img * 255)
    return img

def rescale_img(ds, img):
    if 'RescaleIntercept' in ds and 'RescaleSlope' in ds:
        img = img * ds.RescaleSlope + ds.RescaleIntercept
    return img

def change_file_num(blob_name, val):
    path = blob_name[0:-7]
    num = int(blob_name[-7:-4])
    return path+str(num+val).zfill(3)+'.dcm'
    
def get_dicom(blob_name):
    blob_client = container_client.get_blob_client(blob_name)
    blob_data = blob_client.download_blob().readall()
    blob_stream = BytesIO(blob_data)
    return dcmread(blob_stream)

def get_image(blob_name):
    try:
        ds = get_dicom(blob_name)
    except:
        return np.zeros((512, 512), dtype=np.uint8)
    image = rescale_img(ds, ds.pixel_array)
    image = window_img(image, -300, 2000)
    return image

# Recreate directories

In [119]:
!rm -rf /home/andrew/ITRI-LungCancer/sample/
!mkdir -p /home/andrew/ITRI-LungCancer/sample

# Create sample dataset from a single CT scan


In [120]:
# filepath = r'LIDC-IDRI-0075\01-01-2000-NA-NA-14796\30083.000000-NA-95222'
# filepath = r'LIDC-IDRI-0527\01-01-2000-NA-NA-75447\5485.000000-ChestRoutine  3.0  B31f-48007'
filepath = r'LIDC-IDRI-0704\01-01-2000-NA-CT THORAX WO IV CONTRA-50143\31273.000000-Recon 2 ACRIN LARGE-98412'
# filepath = r'LIDC-IDRI-0845\01-01-2000-NA-NA-15546\1624.000000-NLST TLC VOL B30F-61989'
# filepath = r'LIDC-IDRI-1008\01-01-2000-NA-NA-10827\5491.000000-NA-53182'

for i in range(0, 1000):
    # Fetch DICOM
    blob_name = filepath + f"\\1-{i+1:03}.dcm"
    try:
        blob_client = container_client.get_blob_client(blob_name)
        blob_data = blob_client.download_blob().readall()
    except:
        print("\nDone")
        break
    blob_stream = BytesIO(blob_data)
    ds = dcmread(blob_stream)
    
    # Image transformations
    image_base = rescale_img(ds, ds.pixel_array)
    image_base = window_img(image_base, -300, 2000)
    
    image_prev = get_image(change_file_num(blob_name, -1))
    image_next = get_image(change_file_num(blob_name, 1))
    
    image = np.stack([image_prev, image_base, image_next], axis=-1)
    
    # Saves slice location in filename - used later for mapping to annotations
    slice_location = ds.ImagePositionPatient[2]
    patient_id = ds.PatientID
    filename = f"{patient_id}_{i+1:03}_{slice_location}"
    
    image_path = f'/home/andrew/ITRI-LungCancer/sample/{filename}.png'
    cv2.imwrite(image_path, image)
    print(f"\r{filename}", end="\r", flush=True)

LIDC-IDRI-0704_002_3.125000

LIDC-IDRI-0704_487_-300.000000
Done
