In [1]:
from segmentation import segment, init_segment_model
from augmentation import augment, init_augment_model
import SimpleITK as sitk
import os
import csv
import numpy as np
from util import XyzTuple, xyz2irc, logging, getCache
import collections
from collections import namedtuple  
import glob
from concurrent.futures import ThreadPoolExecutor

In [2]:
CandidateInfoTuple = namedtuple('CandidateInfoTuple', 'isNodule_bool, hasAnnotation_bool, isMal_bool, diameter_mm, series_uid, center_xyz')
def getCandidateInfoList(requireOnDisk_bool=True):
    # We construct a set with all series_uids that are present on disk.
    # This will let us use the data, even if we haven't downloaded all of
    # the subsets yet.
    mhd_list = glob.glob('C:/Users/oplab/Desktop/Luna16_data/Luna16_img/subset*/*.mhd')
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

    candidateInfo_list = []
    with open('C:/Users/oplab/Desktop/Luna16_data/annotations_with_malignancy.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])
            isMal_bool = {'False': False, 'True': True}[row[5]] #it record the malignancy or not

            if series_uid not in presentOnDisk_set and requireOnDisk_bool:
                continue

            candidateInfo_list.append(
                CandidateInfoTuple(
                    True,
                    True,
                    isMal_bool,
                    annotationDiameter_mm,
                    series_uid,
                    annotationCenter_xyz,
                )
            )

    with open('C:/Users/oplab/Desktop/Luna16_data/candidates_V2.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]

            if series_uid not in presentOnDisk_set and requireOnDisk_bool:
                continue

            isNodule_bool = bool(int(row[4]))
            candidateCenter_xyz = tuple([float(x) for x in row[1:4]])

            if not isNodule_bool:
                candidateInfo_list.append(
                    CandidateInfoTuple(
                        False,
                        False,
                        False,
                        0.0,
                        series_uid,
                        candidateCenter_xyz,
                    )
                )

    candidateInfo_list.sort(reverse=True)
    return candidateInfo_list

In [3]:
def getCandidateInfoDict(requireOnDisk_bool=True):  #把candidateInfoList包成Dict
    candidateInfo_list = getCandidateInfoList(requireOnDisk_bool)
    candidateInfo_dict = {}

    for candidateInfo_tup in candidateInfo_list:
        candidateInfo_dict.setdefault(candidateInfo_tup.series_uid,
                                      []).append(candidateInfo_tup)

    return candidateInfo_dict

In [4]:
# candidate_dict =  getCandidateInfoDict()

In [5]:
# write_root = "D://LUNA//Luna16_AugData"
# if not os.path.exists(write_root):
#     os.mkdir("D://LUNA//Luna16_AugData")
    
# append = True
# # split to train test
# for sub_root in os.listdir("C://Users//oplab//Desktop//Luna16_data//Luna16_img//"):
#     root = os.path.join("E:\LUNA\Luna_Data", sub_root)
#     print(root)
#     for path in os.listdir(root):
#         break
#         # npy_path = glob.glob('./augmented_segmented/{}.npy'.format(series_uid))
#         npy_path = glob.glob(os.path.join(write_root, sub_root, path) + ''.format(series_uid))
#         if (npy_path != [] and append):
#             print("pass")
#             continue
#         mhd_path = glob.glob('C:/Users/oplab/Desktop/Luna16_data/Luna16_img/subset*/{}.mhd'.format(series_uid))
#         ct_mhd = sitk.ReadImage(mhd_path)
#         if ct_mhd.GetDimension()==4 and ct_mhd.GetSize()[3]==1:
#             ct_mhd = ct_mhd[...,0]
#         origin = sitk.GetArrayFromImage(ct_mhd)
#         print(mhd_path)
#         augmented = augment(origin)
#         # segmented = segment(augmented)
#         # hu_a = np.array(segmented, dtype=np.float32)
#         hu_a = np.array(augmented, dtype=np.float32)
#         # np.save("./augmented/{}.npy".format(series_uid), hu_a)
#         # np.save("./augmented_segmented/{}.npy".format(series_uid), hu_a)
#         # Load or create a SimpleITK image
#         current_origin = ct_mhd.GetOrigin()
#         current_spacing = ct_mhd.GetSpacing()

#         sitk_image = sitk.GetImageFromArray(augmented)
#         # image_short = sitk.Cast(image, sitk.sitkInt16)

#         # Set the image origin, spacing, and direction (modify as needed)
#         sitk_image.SetOrigin((current_origin[0], current_origin[1], current_origin[2]))
#         sitk_image.SetSpacing((current_spacing[0], current_spacing[1], current_spacing[2] / 2))

#         # Save the image as a MetaImage file
#         if not os.path.exists(os.path.join(write_root, sub_root)):
#             os.mkdir(os.path.join(write_root, sub_root))
#         sitk.WriteImage(sitk_image, os.path.join(write_root, sub_root, path)) 

In [6]:
# npy_path = glob.glob(os.path.join(write_root, sub_root, path) + ''.format(series_uid))

In [7]:
model_best, device = init_augment_model()
# model, device = init_segment_model()

In [8]:
# Function to process a single image
def process_image(sub_root, path, write_root_1, write_root_2, append):
    raw_path_1 = os.path.join(write_root_1, sub_root, path)
    exist_path_1 = glob.glob(os.path.join(write_root_1, sub_root, path))
    raw_path_2 = os.path.join(write_root_2, sub_root, path)
    exist_path_2 = glob.glob(os.path.join(write_root_2, sub_root, path))
    
    if exist_path_1 and append:
        print("pass")
        return
    
    imagePath = os.path.join(root, path)
    ct_mhd = sitk.ReadImage(imagePath)
    
    if ct_mhd.GetDimension() == 4 and ct_mhd.GetSize()[3] == 1:
        ct_mhd = ct_mhd[..., 0]
    
    origin = sitk.GetArrayFromImage(ct_mhd)
    augmented = augment(origin)
    hu_a = np.array(augmented, dtype=np.float32)
    
    current_origin = ct_mhd.GetOrigin()
    current_spacing = ct_mhd.GetSpacing()
    
    sitk_image = sitk.GetImageFromArray(augmented)
    sitk_image.SetOrigin((current_origin[0], current_origin[1], current_origin[2]))
    sitk_image.SetSpacing((current_spacing[0], current_spacing[1], current_spacing[2] / 2))
    
    if not os.path.exists(os.path.join(write_root_1, sub_root)):
        os.makedirs(os.path.join(write_root_1, sub_root))
    
    print(os.path.join(write_root_1, sub_root, path))
    sitk.WriteImage(sitk_image, raw_path_1)


In [9]:
# # Main processing loop
# write_root_1 = "D:\\LUNA\\Luna16_AugData"
# if not os.path.exists(write_root_1):
#     os.mkdir(write_root_1)

# write_root_2 = "D:\\LUNA\\Luna16_SegData"
# if not os.path.exists(write_root_2):
#     os.mkdir(write_root_2)

# read_root = "C:\\Users\\oplab\\Desktop\\Luna16_data\\Luna16_img"

# append = True

# # Create a ThreadPoolExecutor to parallelize image processing
# max_workers = 4  # You can adjust the number of workers based on your machine's capabilities
# with ThreadPoolExecutor(max_workers=max_workers) as executor:
#     for sub_root in os.listdir(read_root):
#         root = os.path.join(read_root, sub_root)
#         print(root)
#         for path in os.listdir(root):
#             if path.find("mhd") >= 0:
#                 print(path)
#                 executor.submit(process_image, sub_root, path, write_root_1, write_root_2, append)

In [16]:
model, device = init_segment_model()

In [None]:
write_root_1 = "D:\\LUNA\\Luna16_AugData"
if not os.path.exists(write_root_1):
    os.mkdir("D:\\LUNA\\Luna16_AugData")
write_root_2 = "D:\\LUNA\\Luna16_SegData"
if not os.path.exists(write_root_2):
    os.mkdir("D:\\LUNA\\Luna16_SegData")
    
# read_root = "C:\\Users\\oplab\\Desktop\\Luna16_data\\Luna16_img"
read_root = "D:\\LUNA\\Luna16_AugData"
    
append = True

# split to train test
# for sub_root in os.listdir(read_root):
#     root = os.path.join(read_root, sub_root)
#     print(root)
sub_root = "subset9"
root = os.path.join(read_root, sub_root)
print(root)

for path in os.listdir(root):
    if path.find("mhd") >= 0:
        raw_path_1 = os.path.join(write_root_1, sub_root, path)
        exist_path_1 =  glob.glob(os.path.join(write_root_1, sub_root, path))
        raw_path_2 = os.path.join(write_root_2, sub_root, path)
        exist_path_2 =  glob.glob(os.path.join(write_root_2, sub_root, path))
        if (exist_path_2 != [] and append):
            print("pass")
            continue
        print(path)
        imagePath = os.path.join(root, path)
        ct_mhd = sitk.ReadImage(imagePath)
        if ct_mhd.GetDimension()==4 and ct_mhd.GetSize()[3]==1:
            ct_mhd = ct_mhd[...,0]
        origin = sitk.GetArrayFromImage(ct_mhd)

       # segment part
        segmented = segment(origin, model, device)
        hu_a = np.array(segmented, dtype=np.float32)
        # Load or create a SimpleITK image
        current_origin = ct_mhd.GetOrigin()
        current_spacing = ct_mhd.GetSpacing()

        sitk_image = sitk.GetImageFromArray(segmented)
        # image_short = sitk.Cast(image, sitk.sitkInt16)

        # Set the image origin, spacing, and direction (modify as needed)
        sitk_image.SetOrigin((current_origin[0], current_origin[1], current_origin[2]))
        sitk_image.SetSpacing((current_spacing[0], current_spacing[1], current_spacing[2]))

        # Save the image as a MetaImage file
        if not os.path.exists(os.path.join(write_root_2, sub_root)):
            os.mkdir(os.path.join(write_root_2, sub_root))
        print(os.path.join(write_root_2, sub_root, path))
        sitk.WriteImage(sitk_image, raw_path_2) 

D:\LUNA\Luna16_AugData\subset9
pass
1.3.6.1.4.1.14519.5.2.1.6279.6001.109882169963817627559804568094.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5.2.1.6279.6001.109882169963817627559804568094.mhd
1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd
1.3.6.1.4.1.14519.5.2.1.6279.6001.114914167428485563471327801935.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5.2.1.6279.6001.114914167428485563471327801935.mhd
1.3.6.1.4.1.14519.5.2.1.6279.6001.121108220866971173712229588402.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5.2.1.6279.6001.121108220866971173712229588402.mhd
1.3.6.1.4.1.14519.5.2.1.6279.6001.121805476976020513950614465787.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5.2.1.6279.6001.121805476976020513950614465787.mhd
1.3.6.1.4.1.14519.5.2.1.6279.6001.124656777236468248920498636247.mhd
D:\LUNA\Luna16_SegData\subset9\1.3.6.1.4.1.14519.5

In [10]:
write_root_1 = "D:\\LUNA\\Luna16_AugData"
if not os.path.exists(write_root_1):
    os.mkdir("D:\\LUNA\\Luna16_AugData")
write_root_2 = "D:\\LUNA\\Luna16_SegData"
if not os.path.exists(write_root_2):
    os.mkdir("D:\\LUNA\\Luna16_SegData")
    
read_root = "C:\\Users\\oplab\\Desktop\\Luna16_data\\Luna16_img"
# read_root = "D:\\LUNA\\Luna16_AugData"
    
append = True

# split to train test
for sub_root in os.listdir(read_root):
    root = os.path.join(read_root, sub_root)
    print(root)
    for path in os.listdir(root):
        if path.find("mhd") >= 0:
            raw_path_1 = os.path.join(write_root_1, sub_root, path)
            exist_path_1 =  glob.glob(os.path.join(write_root_1, sub_root, path))
            raw_path_2 = os.path.join(write_root_2, sub_root, path)
            exist_path_2 =  glob.glob(os.path.join(write_root_2, sub_root, path))
            if (exist_path_1 != [] and append):
                print("pass")
                continue
            print(path)
            imagePath = os.path.join(root, path)
            ct_mhd = sitk.ReadImage(imagePath)
            if ct_mhd.GetDimension()==4 and ct_mhd.GetSize()[3]==1:
                ct_mhd = ct_mhd[...,0]
            origin = sitk.GetArrayFromImage(ct_mhd)
            augmented = augment(origin, model_best, device)
            hu_a = np.array(augmented, dtype=np.float32)
            # Load or create a SimpleITK image
            current_origin = ct_mhd.GetOrigin()
            current_spacing = ct_mhd.GetSpacing()

            sitk_image = sitk.GetImageFromArray(augmented)
            # image_short = sitk.Cast(image, sitk.sitkInt16)

            # Set the image origin, spacing, and direction (modify as needed)
            sitk_image.SetOrigin((current_origin[0], current_origin[1], current_origin[2]))
            sitk_image.SetSpacing((current_spacing[0], current_spacing[1], current_spacing[2] / 2))

            # Save the image as a MetaImage file
            if not os.path.exists(os.path.join(write_root_1, sub_root)):
                os.mkdir(os.path.join(write_root_1, sub_root))
            print(os.path.join(write_root_1, sub_root, path))
            sitk.WriteImage(sitk_image, raw_path_1) 
            
           # segment part
#             segmented = segment(origin, model, device)
#             hu_a = np.array(segmented, dtype=np.float32)
#             # Load or create a SimpleITK image
#             current_origin = ct_mhd.GetOrigin()
#             current_spacing = ct_mhd.GetSpacing()

#             sitk_image = sitk.GetImageFromArray(segmented)
#             # image_short = sitk.Cast(image, sitk.sitkInt16)

#             # Set the image origin, spacing, and direction (modify as needed)
#             sitk_image.SetOrigin((current_origin[0], current_origin[1], current_origin[2]))
#             sitk_image.SetSpacing((current_spacing[0], current_spacing[1], current_spacing[2]))

#             # Save the image as a MetaImage file
#             if not os.path.exists(os.path.join(write_root_2, sub_root)):
#                 os.mkdir(os.path.join(write_root_2, sub_root))
#             print(os.path.join(write_root_2, sub_root, path))
#             sitk.WriteImage(sitk_image, raw_path_2) 

C:\Users\oplab\Desktop\Luna16_data\Luna16_img\subset0
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
C:\Users\oplab\Desktop\Luna16_data\Luna16_img\subset1
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
C:

In [11]:
# print(raw_path)

In [12]:
os.path.join(write_root, sub_root, path)

NameError: name 'write_root' is not defined

In [None]:
glob.glob(os.path.join(write_root, sub_root, path))

In [None]:
sitk.WriteImage(sitk_image, "D://udet//augmented//{}.raw".format(series_uid)) 

In [None]:
npy_path = glob.glob('./augmented_segmented/{}.npy'.format("1.3.6.1.4.1.14519.5.2.1.6279.6001.287966244644280690737019247886"))

In [None]:
npy_path

In [None]:
if (npy_path != [] and append):
        print("pass")

In [None]:
np.save("./augmented_segmented/{}.npy".format(series_uid), hu_a)