In [7]:
import SimpleITK as sitk
import numpy as np
import csv
from glob import glob
import pandas as pd
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import cv2

In [57]:
# LUNA_mask_extraction.py

file_list = os.listdir("./subset0")
file_list = list(map(lambda file : "./subset0/" + file, file_list))

def make_mask(center, diam, z, width, height, spacing, origin):
    '''
        Center : centers of circles px -- list of coordinates x,y,z
        diam : diameters of circles px -- diameter
        widthXheight : pixel dim of image
        spacing = mm/px conversion rate np array x,y,z
        origin = x,y,z mm np.array
        z = z position of slice in world coordinates mm
    '''
    mask = np.zeros([height, width]) # 0's everywhere except nodule swapping x,y to match img
    #convert to nodule space from world coordinates

    # Defining the voxel range in which the nodule falls
    v_center = (center - origin)/spacing
    v_diam = int(diam/spacing[0] + 1)
    v_xmin = np.max([0, int(v_center[0]-v_diam) - 1])
    v_xmax = np.min([width-1, int(v_center[0]+v_diam) + 1])
    v_ymin = np.max([0, int(v_center[1]-v_diam) - 1]) 
    v_ymax = np.min([height-1, int(v_center[1]+v_diam) + 1])

    v_xrange = range(v_xmin, v_xmax+1)
    v_yrange = range(v_ymin, v_ymax+1)

    # Convert back to world coordinates for distance calculation
    x_data = [x*spacing[0]+origin[0] for x in range(width)]
    y_data = [x*spacing[1]+origin[1] for x in range(height)]

    # Fill in 1 within sphere around nodule
    for v_x in v_xrange:
        for v_y in v_yrange:
            p_x = spacing[0]*v_x + origin[0]
            p_y = spacing[1]*v_y + origin[1]
            if np.linalg.norm(center - np.array([p_x, p_y, z])) <= diam:
                mask[int((p_y-origin[1]) / spacing[1]), int((p_x-origin[0]) / spacing[0])] = 1.0
    return(mask, [v_xmin, v_xmax, v_ymin, v_ymax])

def get_filename(case):
    global file_list
    for f in file_list:
        if case in f:
            return(f)
        
def matrix2int16(matrix):
    ''' 
        matrix must be a numpy array NXN
        Returns uint16 version
    '''
    m_min= np.min(matrix)
    m_max= np.max(matrix)
    matrix = matrix-m_min
    return(np.array(np.rint( (matrix-m_min)/float(m_max-m_min) * 65535.0), dtype=np.uint16))

if __name__ == "__main__":
    df_node = pd.read_csv("./annotations.csv")
    df_node["file"] = df_node["seriesuid"].apply(get_filename)
    df_node = df_node.dropna()
    df_roi = pd.read_csv("./temp.csv")
    df_roi_cur = dict()
    
    for fcount, img_file in enumerate(tqdm(file_list)):
        # print("Getting mask for image file %s" % img_file.replace("./subset0",""))
        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
        if len(mini_df) > 0:       # some files may not have a nodule--skipping those
            itk_img = sitk.ReadImage(img_file) # read .mhd file
            img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
            num_z, height, width = img_array.shape
            origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
            spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
            
            for node_idx, cur_row in mini_df.iterrows():       
                node_x = cur_row["coordX"]
                node_y = cur_row["coordY"]
                node_z = cur_row["coordZ"]
                diam = cur_row["diameter_mm"]
                
                imgs = np.ndarray([5, height, width], dtype=np.float32)
                masks = np.ndarray([5, height, width], dtype=np.uint8)
                center = np.array([node_x, node_y, node_z])   # nodule center
                v_center = np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
            
            for i, i_z in enumerate(np.arange(int(v_center[2])-2, int(v_center[2])+3).clip(0, num_z-1)): # clip prevents going out of bounds in Z
                mask, roi = make_mask(center, diam, i_z*spacing[2]+origin[2], width, height, spacing, origin)
                if i == 1:
                    # roi_key = "roi_%04d_%04d" % (fcount, node_idx)
                    roi_key = f"roi_{file_list[0].split('/')[-1].rsplit('.', 1)[0]}_{fcount}_{node_idx}"
                    df_roi_cur.update({roi_key: roi})
                masks[i] = mask
                imgs[i] = img_array[i_z]
            img_name = f"images_{file_list[0].split('/')[-1].rsplit('.', 1)[0]}_{fcount}_{node_idx}.npy"
            mask_name = f"masks_{file_list[0].split('/')[-1].rsplit('.', 1)[0]}_{fcount}_{node_idx}.npy"
            # np.save(os.path.join("./images", "images_%04d_%04d.npy" % (fcount, node_idx)), imgs)
            # np.save(os.path.join("./masks", "masks_%04d_%04d.npy" % (fcount, node_idx)), masks)
            
            np.save(os.path.join("./images", img_name), imgs)
            np.save(os.path.join("./masks", mask_name), masks)
            
    df_temp = pd.DataFrame(df_roi_cur.items(), columns=['ID', 'ROI'])
    df_roi = pd.concat([df_roi, df_temp]).drop("Unnamed: 0", axis=1)
    df_roi.to_csv("temp.csv", index=False)

100%|██████████| 178/178 [00:56<00:00,  3.15it/s]


In [59]:
def normalize(image):
    MIN_BOUND = -1000.0
    MAX_BOUND = 400.0
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image>1] = 1.
    image[image<0] = 0.
    return image

img_files = os.listdir("./images")
for img_file in tqdm(img_files):
    imgs = np.load(f"./images/{img_file}")
    temp = normalize(imgs[2])
    img_grey = temp * 255
    img_rgb = np.stack((img_grey,)*3, -1)
    cv2.imwrite(f"./preprocessed_img/{img_file}.png", img_rgb)

  0%|          | 0/134 [00:00<?, ?it/s]

100%|██████████| 134/134 [00:05<00:00, 24.90it/s]


In [54]:
df_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      67 non-null     object
 1   ROI     67 non-null     object
dtypes: object(2)
memory usage: 1.2+ KB


In [56]:
df_roi = pd.read_csv("./temp.csv").drop("Unnamed: 0", axis=1)
df_roi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 134 entries, 0 to 133
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      134 non-null    object
 1   ROI     134 non-null    object
dtypes: object(2)
memory usage: 2.2+ KB


In [41]:
a = f"roi_{file_list[0].split('/')[-1].rsplit('.', 1)[0]}_{fcount}_{node_idx}"
print(a)

roi_1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260_ 177_1180


In [28]:
print(len(df_roi))
print(df_roi.get("roi_0014_0098"))

67
[np.int64(352), np.int64(378), np.int64(251), np.int64(277)]


In [33]:
df_temp = pd.DataFrame(df_roi.items(), columns=['ID', 'ROI'])

In [38]:
df_temp.head()

Unnamed: 0,ID,ROI
0,roi_0002_0023,"[100, 120, 336, 356]"
1,roi_0004_0026,"[399, 417, 328, 346]"
2,roi_0006_0028,"[419, 435, 343, 359]"
3,roi_0010_0086,"[447, 467, 218, 238]"
4,roi_0014_0098,"[352, 378, 251, 277]"


In [37]:

df_temp.to_csv("temp.csv")

In [32]:
print(file_list[0].split('/')[-1].rsplit('.', 1)[0])

1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260


In [27]:
imgs = np.load("./images/" + 'images_0014_0098.npy')
masks = np.load("./masks/" + 'masks_0014_0098.npy')
print(masks.shape)
print(imgs[0].shape)
    
MIN_BOUND = -1000.0
MAX_BOUND = 400.0
    
def normalize(image):
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image>1] = 1.
    image[image<0] = 0.
    return image
temp = normalize(imgs[0])
# img_grey = temp * 255
# img_rgb = np.stack((img_grey,)*3, -1)
img_rgb = cv2.imread("./temp.png")
x_min, x_max, y_min, y_max = df_roi.get("roi_0014_0098")
cv2.rectangle(img_rgb, (x_min, y_min), (x_max, y_max), color=(255,0,0), thickness=2)
cv2.imshow("lalala", img_rgb)
cv2.waitKey(0)
# cv2.imwrite("temp.png", img_rgb)

(5, 512, 512)
(512, 512)


-1

In [30]:
# temp.py

import SimpleITK as sitk
import os
import numpy as np
import pandas as pd

file_name = "./subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260.mhd"
itk_img = sitk.ReadImage(file_name)
img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
# center = np.array([node_x, node_y, node_z])   # nodule center
origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
# v_center =np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering

num_z, height, width = img_array.shape
print(num_z, height, width)

121 512 512
