# Process images and create labels for YOLO

In [87]:
from ultralytics import YOLO
from azure.storage.blob import BlobServiceClient
from pydicom import dcmread
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from pydicom.pixel_data_handlers.util import apply_voi_lut
import pylidc as pl

try:
    with open('/home/andrew/ITRI-LungCancer/keys.txt', 'r') as file:
        data = file.read().splitlines()
        account_name    = data[0]
        account_key     = data[1]
        container_name  = data[2]
    
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
    container_client = blob_service_client.get_container_client(container_name)
    blob_name_list = container_client.list_blob_names()
except Exception as ex:
    print('Exception:')
    print(ex)

# Organize file system

In [88]:
# Clean folders
!rm -rf /home/andrew/ITRI-LungCancer/dataset/
# !rm -rf /home/andrew/ITRI-LungCancer/runs

# Recreate dataset structure
!mkdir -p /home/andrew/ITRI-LungCancer/dataset/images/{train,val,test}
!mkdir -p /home/andrew/ITRI-LungCancer/dataset/labels/{train,val,test}

9561.30s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
9566.57s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
9571.72s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


# Visualize annotation properties

In [None]:
# Analyzing bbox sizes
anns = pl.query(pl.Annotation).all()

data = []
for i, ann in enumerate(anns):
    bbox = ann.bbox()
    bbox_width = (bbox[1].stop - bbox[1].start)/512
    bbox_height = (bbox[0].stop - bbox[0].start)/512
    entry = [bbox_width, bbox_height, ann.malignancy, ann.diameter, ann.surface_area, ann.volume]
    data.append(entry)
    
df = pd.DataFrame(data, columns=['bbox_width', 'bbox_height', 'malignancy', 'diameter', 'surface_area', 'volume'])
plt.figure(figsize=(20, 20))
for i in range(len(df.columns)):
    plt.subplot(len(df.columns), 1, i+1)
    plt.hist(df[df.columns[i]], bins=50)
    plt.title(df.columns[i], y=0, loc='right')

# Helper Functions for Creating Dataset

In [89]:
def window_img(img, window_center, window_width):
    win_min = window_center - window_width / 2.0
    win_max = window_center + window_width / 2.0
    img = np.clip(img, win_min, win_max)
    img = (img - win_min) / (win_max - win_min)
    img = np.uint8(img * 255)
    return img

def rescale_img(ds, img):
    if 'RescaleIntercept' in ds and 'RescaleSlope' in ds:
        img = img * ds.RescaleSlope + ds.RescaleIntercept
    return img

def create_dataset(count, data_string):
    while(count > 0):
        blob_name = next(blob_name_list)
        scan_name = blob_name.split('/')[0]
        slice_num = blob_name.split('/')[3].split('-')[1]
        
        blob_client = container_client.get_blob_client(blob_name)
        blob_data = blob_client.download_blob().readall()
        blob_stream = BytesIO(blob_data)
        ds = dcmread(blob_stream)
        
        scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == ds.PatientID).first()
        slice_location = ds.ImagePositionPatient[2]
        
        for ann_count, ann in enumerate(scan.annotations):
            for contour in ann.contours:
                if abs(contour.image_z_position - slice_location) < scan.slice_spacing and ann.boolean_mask().sum() > 400:
                    bbox = ann.bbox()
                    bbox_x_center = (bbox[1].start + bbox[1].stop) / ds.Columns / 2
                    bbox_y_center = (bbox[0].start + bbox[0].stop) / ds.Rows / 2
                    bbox_width = (bbox[1].stop - bbox[1].start)/ds.Columns
                    bbox_height = (bbox[0].stop - bbox[0].start)/ds.Rows
                    
                    image = rescale_img(ds, ds.pixel_array)
                    image = window_img(image, -300, 2000)

                    filename = f"{scan_name}_{slice_num}_{ann_count}"
                    
                    image_path = f'/home/andrew/ITRI-LungCancer/dataset/images/{data_string}/{filename}.png'
                    cv2.imwrite(image_path, image)
                    
                    label_path = f'/home/andrew/ITRI-LungCancer/dataset/labels/{data_string}/{filename}.txt'
                    label_txt = f"0 {bbox_x_center} {bbox_y_center} {bbox_width} {bbox_height}"
                    with open(label_path, 'w') as file:
                        file.write(label_txt)
                        
                    count -= 1
                    if count > 0:
                        print(f"{data_string}: {count}    ", end='\r', flush=True)
    print(f"{data_string} done!")

# Create sample dataset from a single CT scan


In [None]:
filepath = r'LIDC-IDRI-0343\01-01-2000-NA-CT LUNG SCREEN-57728\NA-25883'
for i in range(130):
    blob_name = filepath + f"\\1-{i+1:03}.dcm"
    blob_client = container_client.get_blob_client(blob_name)
    blob_data = blob_client.download_blob().readall()
    blob_stream = BytesIO(blob_data)
    ds = dcmread(blob_stream)
    
    image = rescale_img(ds, ds.pixel_array)
    image = window_img(image, -300, 2000)

    filename = f"LIDC-IDRI-0343_1-{i+1:03}"

    image_path = f'/home/andrew/ITRI-LungCancer/sample/{filename}.png'
    cv2.imwrite(image_path, image)
    print(blob_name)

# Create Datasets

In [90]:
create_dataset(8000, "train")
create_dataset(2000, "val")
create_dataset(200, "test")

train: 2718    

# Train YOLO

In [None]:
model = YOLO("/home/andrew/ITRI-LungCancer/YOLO/yolov8x.pt")
results = model.train(data="/home/andrew/ITRI-LungCancer/YOLO/dataset.yaml",epochs=1000,patience=50,cache=True,lr0=1E-4,save_period=5,batch=0.6,imgsz=256)
# model.save('/home/andrew/ITRI-LungCancer/YOLO/model.pt')

# Evaluate YOLO

In [None]:
from ultralytics import YOLO
import os
import numpy as np
import cv2

model = YOLO('/home/andrew/ITRI-LungCancer/YOLO/model.pt')

test_imgs = []
imgs = os.listdir('/home/andrew/ITRI-LungCancer/dataset/images/test')
for i in range(3):
    index = np.random.randint(0, len(imgs))
    test_imgs.append(f'/home/andrew/ITRI-LungCancer/dataset/images/test/{imgs[index]}')

results = model(test_imgs)

# Test model on single CT

In [None]:
from ultralytics import YOLO
import os
import numpy as np
import cv2

model = YOLO('/home/andrew/ITRI-LungCancer/YOLO/model.pt')

path = '/home/andrew/ITRI-LungCancer/sample/'
imgs = os.listdir(path)
imgs.sort()

model_input = []
for img in imgs:
    model_input.append(path+img)

results = model(model_input, conf=0.5, verbose=True)