In [2]:
import numpy as np
#import pylidc as pl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pickle
import math
import pandas as pd

In [3]:
df_classified = pd.read_excel("tcia-diagnosis-data-2012-04-20.xls")
classified_examples_list = df_classified.iloc[:, 0].to_list()

df_annotations = pd.read_csv("LabIACD Project 1/LabIACD-Project-1/Data/features.csv")
classified_examples_annotations_df = df_annotations[df_annotations["patient_id"].isin(classified_examples_list)].iloc[:, [0,6]]

classified_examples_annotations_dict = {}
for patient_id in classified_examples_list:
    annotation_ids = classified_examples_annotations_df[classified_examples_annotations_df["patient_id"] == patient_id].iloc[:,1].to_list()
    if len(annotation_ids) > 0:
        classified_examples_annotations_dict[patient_id] = annotation_ids

for p_id, ann_id in classified_examples_annotations_dict.items():
    print(f"{p_id}: {ann_id}")

with open("classified_examples_annotations_dict.pkl", "wb") as file:
    pickle.dump(classified_examples_annotations_dict, file)

classified_examples_annotations_df

FileNotFoundError: [Errno 2] No such file or directory: 'tcia-diagnosis-data-2012-04-20.xls'

In [None]:
def get_max_frame_size(patients_annotations_dict: dict[str, list[int]]):
    height_max = width_max = 0

    for index, pid in enumerate(patients_annotations_dict.keys()):
        scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
        print(f"At scan {index+1} / {len(patients_annotations_dict)}")

        for a in scan.annotations:
            bbox = a.bbox()
            height, width, _ = a.scan.to_volume(verbose=False)[bbox].shape
            height_max = max(height_max, height)
            width_max = max(width_max, width)
    
    # preventing from height_max and width_max being odd values
    if height_max%2 == 1: height_max += 1
    if width_max%2 == 1: width_max += 1
    return height_max, width_max

In [None]:
def get_zoomed_out_bbox(cur_bbox, i_range, j_range):
    i_middle = cur_bbox[0].start + (cur_bbox[0].stop - cur_bbox[0].start) // 2
    j_middle = cur_bbox[1].start + (cur_bbox[1].stop - cur_bbox[1].start) // 2

    i_slice_limits = [i_middle - i_range//2, i_middle + i_range//2]
    j_slice_limits = [j_middle - j_range//2, j_middle + j_range//2]

    if i_slice_limits[0] < 0:
        i_slice_limits[1] += -1*i_slice_limits[0]
        i_slice_limits[0] = 0
    
    if j_slice_limits[0] < 0:
        j_slice_limits[1] += -1*j_slice_limits[0]
        j_slice_limits[0] = 0

    if i_slice_limits[1] >= 512:
        i_slice_limits[0] -= i_slice_limits[1]-512
        i_slice_limits[1] = 511
    
    if j_slice_limits[1] >= 512:
        j_slice_limits[0] -= j_slice_limits[1]-512
        j_slice_limits[1] = 511

    i_slice = slice(i_slice_limits[0], i_slice_limits[1], None)
    j_slice = slice(j_slice_limits[0], j_slice_limits[1], None)

    return (i_slice, j_slice, cur_bbox[2])

In [None]:
# saves the masked version of the nodules with thei respecitve annotations
# also saves a csv table with columns "Patiend_ID" "Annotation_ID" and "Masked_Image_Path"
def get_masked_nodules_pictures(patients_annotations_dict: dict[str, list[int]], frame_height=100, frame_width=100):
    df = []
    
    for index, pid in enumerate(patients_annotations_dict.keys()):

        print(f"Current Patient: {pid} | {index+1}/{len(patients_annotations_dict)}")
        scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
        ann_list = patients_annotations_dict[pid]

        for ann_i, annotation in enumerate(scan.annotations):
            
            bbox = get_zoomed_out_bbox(annotation.bbox(), frame_height, frame_width)
            bbox_np = np.array([[row.start, row.stop] for row in bbox])
            vol = annotation.scan.to_volume(verbose=False)
            mask = annotation.boolean_mask(bbox=bbox_np)
            z = math.floor(vol[bbox].shape[2] / 2) 
            mask_map = np.array(mask[:,:,z])
            masked_image = np.array(vol[bbox][:,:,z]) 
            
            # modifying the original image such that:
            #     if mask_map[i][j] == False --> image[i][j] = black
            #     if mask_map[i][j] == True  --> image[i][j] is preserved
            for i in range(len(mask_map)-1):
                for j in range(len(mask_map[0])-1):
                    if mask_map[i][j] == False:
                        masked_image[i][j] = -1000 # black color value
            
            masked_image_path = f"masked_images/annotation-{ann_list[ann_i]}.png"
            mpimg.imsave(masked_image_path, masked_image, cmap="gray")
            df.append({"patient_id": pid, "annotation_id": ann_list[ann_i], "masked_imagepPath": masked_image_path})
    
    with open("masked_images_dict.pkl", "wb") as save_df:
        pickle.dump(df, save_df)

    print("Finished masking!")

In [None]:
classified_examples_annotations_dict = dict()
with open("classified_examples_annotations_dict.pkl", "rb") as pid_list:
    classified_examples_annotations_dict = pickle.load(pid_list)

In [None]:
height, width = get_max_frame_size(classified_examples_annotations_dict)
print(f"The images will have size [{width}]x[{height}] pixels")

In [None]:
get_masked_nodules_pictures(classified_examples_annotations_dict, height, width)

In [None]:
df_dicts = list()
with open("masked_images_dict.pkl", "rb") as dicts:
    df_dicts = pickle.load(dicts)

df = pd.DataFrame(df_dicts, columns=["Patient_ID", "Annotation_ID", "Masked_Image_Path"])
df.to_csv("masked_annotation_images.csv")