In [2]:
# some directory for output the results:
OUTPUT_PATH = '/content/drive/MyDrive/Preprocessed_Luna_2/'

# Resource path which contains: annotations.csv, candidates.csv,
# and subdirectories containing .mhd files.
# This is the directory structure needed to run the code:
# (The code will use all .mhd and .raw files inside subdirectories which their name is in annotations or candidates)
'''
[RESOURCES_PATH]/
            annotations.csv
            candidates.csv
            subset0/
                        *.mhd
                        *.raw
            subset1/
                        *.mhd
                        *.raw
            my_custom_subset/
                        *.mhd
                        *.raw
'''
RESOURCES_PATH = '/content/drive/MyDrive/Luna/'





PADDING_FOR_LOCALIZATION = 10
BLOCK_SIZE = 128
COORDS_CUBE_SIZE = 32
TARGET_SHAPE = (COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, 3, 5)
COORDS_SHAPE = (3, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE)
ANCHOR_SIZES = [10, 30, 60]
VAL_PCT = 0.2
TOTAL_EPOCHS = 100
DEFAULT_LR = 0.01
ANNOTATION_EXIST = True

In [3]:

# UTILS
from functools import partial

import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage as ndi
import scipy
from skimage.filters import roberts
from skimage.measure import label, regionprops
from skimage.morphology import convex_hull_image, disk, binary_closing
from skimage.segmentation import clear_border


def argmax_3d(img: np.array):
    max1 = np.max(img, axis=0)
    argmax1 = np.argmax(img, axis=0)
    max2 = np.max(max1, axis=0)
    argmax2 = np.argmax(max1, axis=0)
    argmax3 = np.argmax(max2, axis=0)
    argmax_3d = (argmax1[argmax2[argmax3], argmax3], argmax2[argmax3], argmax3)
    return argmax_3d, img[argmax_3d]


def _get_cube_from_img_new(img, origin: tuple, block_size=128, pad_value=106.):
    assert 2 <= len(origin) <= 3
    final_image_shape = tuple([block_size] * len(origin))
    result = np.ones(final_image_shape) * pad_value
    start_at_original_images = []
    end_at_original_images = []
    start_at_result_images = []
    end_at_result_images = []
    for i, center_of_a_dim in enumerate(origin):
        start_at_original_image = int(center_of_a_dim - block_size / 2)
        end_at_original_image = start_at_original_image + block_size
        if start_at_original_image < 0:
            start_at_result_image = abs(start_at_original_image)
            start_at_original_image = 0
        else:
            start_at_result_image = 0
        if end_at_original_image > img.shape[i]:
            end_at_original_image = img.shape[i]
            end_at_result_image = start_at_result_image + (end_at_original_image - start_at_original_image)
        else:
            end_at_result_image = block_size
        start_at_original_images.append(start_at_original_image)
        end_at_original_images.append(end_at_original_image)
        start_at_result_images.append(start_at_result_image)
        end_at_result_images.append(end_at_result_image)
    # for simplicity
    sri = start_at_result_images
    eri = end_at_result_images
    soi = start_at_original_images
    eoi = end_at_original_images

    print("sri {}, eri {}, soi {}, eoi {}. img shape: {}".format(sri,eri,soi,eoi, img.shape))
    if len(origin) == 3:
        result[sri[0]:eri[0], sri[1]:eri[1], sri[2]:eri[2]] = img[soi[0]:eoi[0], soi[1]:eoi[1], soi[2]:eoi[2]]
    elif len(origin) == 2:
        result[sri[0]:eri[0], sri[1]:eri[1]] = img[soi[0]:eoi[0], soi[1]:eoi[1]]

    return result

def normal_crop(img: np.array, centers: list, lungs_bounding_box: list, radii: list, center_of_cube: list,
                spacing: tuple,
                block_size: int,
                pad_value: float, margin: int):

    out_img = _get_cube_from_img_new(img, origin=tuple(center_of_cube), block_size=block_size, pad_value=pad_value)
    out_centers = []
    out_lungs_bounding_box = []
    print("centers: ",centers)
    for i in range(len(centers)):
        diff = np.array(center_of_cube) - np.array(centers[i])
        out_centers.append(
            tuple(np.array([int(block_size / 2)] * len(centers[i]), dtype=int) - diff))
    for i in range(len(lungs_bounding_box)):
        diff = np.array(center_of_cube) - np.array(lungs_bounding_box[i])
        out_lungs_bounding_box.append(tuple(
            np.array([int(block_size / 2)] * len(lungs_bounding_box[i]), dtype=int) - diff))

    return out_img, out_centers, out_lungs_bounding_box




def get_augmented_cube_normal(img: np.array, radii: list, centers: list, center_of_cube: list, spacing: tuple,
                       lungs_bounding_box: list, block_size=128, pad_value=106, margin=10, rot_id=None):


    img2, centers2, lungs_bounding_box2 = normal_crop(img=img, centers=centers,
                                                      lungs_bounding_box=lungs_bounding_box, radii=radii,
                                                      center_of_cube=center_of_cube, spacing=spacing,
                                                      block_size=block_size, pad_value=pad_value, margin=margin)
    existing_centers_in_patch = []
    for i in range(len(centers2)):
        dont_count = False
        for ax in centers2[i]:
            if not (0 <= ax <= block_size):
                dont_count = True
                break
        if not dont_count:
            existing_centers_in_patch.append(i)

    return img2, radii, centers2, lungs_bounding_box2, spacing, existing_centers_in_patch



# RUN AUGMENTATION

import pandas as pd
import os
from ast import literal_eval

class PatchMaker(object):
    def __init__(self, seriesuid: str, coords: list, radii: list, spacing: list, lungs_bounding_box: list,
                 file_path: str,
                 clazz: int):
        self._seriesuid = seriesuid
        self._coords = coords
        self._spacing = spacing
        self._radii = radii
        self._image = np.load(file=f'{file_path}')
        self._clazz = clazz
        self._lungs_bounding_box = lungs_bounding_box

    def _get_augmented_patch_normal(self, center_of_cube, rot_id=None):
        return get_augmented_cube_normal(img=self._image, radii=self._radii, centers=self._coords,
                                  spacing=tuple(self._spacing), rot_id=rot_id, center_of_cube=center_of_cube,
                                  lungs_bounding_box=self._lungs_bounding_box)


    def get_augmented_patches_normal(self):
        radii = self._radii
        list_of_dicts = []
        slices = []
        z_slices = [BLOCK_SIZE //2, self._image.shape[0] // 2, self._image.shape[0]-BLOCK_SIZE//2]
        x_slices = [0.6, 1.8]
        y_slices = [0.9,1.5]

        x_center = ((self._lungs_bounding_box[0][2] + self._lungs_bounding_box[1][2]) // 2) - 10
        y_center = ((self._lungs_bounding_box[0][1] + self._lungs_bounding_box[1][1]) // 2) - 40

        print("Center: x: {}  y: {}".format(x_center,y_center))
        for i in range(2):
            for j in range(2):
                for k in range(3):
                    origin = (
                        int(z_slices[k]),
                        int(max( min(y_slices[j] * y_center, self._image.shape[1]-BLOCK_SIZE//2), BLOCK_SIZE//2 )),
                        int(max( min(x_slices[i] * x_center, self._image.shape[2]-BLOCK_SIZE//2), BLOCK_SIZE//2 ))
                    )

                    print("Origin: ",origin, " Image Shape: ",self._image.shape)

                    img, radii2, centers, lungs_bounding_box, spacing, existing_nodules_in_patch = \
                        self._get_augmented_patch_normal(center_of_cube=origin)
                    existing_radii = [radii2[i] for i in existing_nodules_in_patch]
                    existing_centers = [centers[i] for i in existing_nodules_in_patch]

                    if(ANNOTATION_EXIST):
                        subdir = 'negatives' if self._clazz == 0 else 'positives'
                    else:
                        subdir = 'unlabeled'


                    file_path = f'''augmented/{subdir}/{self._seriesuid}_{i}_{j}_{k}.npy'''
                    list_of_dicts.append(
                        {'seriesuid': self._seriesuid, 'centers': existing_centers, 'sub_index': f'{i}_{j}_{k}',
                            'lungs_bounding_box': lungs_bounding_box, 'radii': existing_radii, 'class': self._clazz})
                    np.save(f'{OUTPUT_PATH}/{file_path}', img)
                    print("Saving: ",{file_path})


        return list_of_dicts


def _get_patches(record):

    rec = record
    seriesuid = rec['seriesuid']

    spacing = literal_eval(rec['spacing'])
    lungs_bounding_box = literal_eval(rec['lungs_bounding_box'])

    centers = literal_eval(rec['centers'])

    radii = literal_eval(rec['radii'])

    clazz = int(rec['class'])




    if(ANNOTATION_EXIST):
        file_directory = 'preprocessed/positives' if clazz == 1 else 'preprocessed/negatives'
    else:
        file_directory = 'preprocessed/unlabeled'

    file_path = f'{OUTPUT_PATH}/{file_directory}/{seriesuid}.npy'



    pm = PatchMaker(seriesuid=seriesuid, coords=centers, radii=radii, spacing=spacing,
                    lungs_bounding_box=lungs_bounding_box,
                    file_path=file_path, clazz=clazz)

    return pm.get_augmented_patches_normal()



def save_augmented_data(preprocess_meta):
    [os.makedirs(d, exist_ok=True) for d in
     [f'{OUTPUT_PATH}/augmented/positives', f'{OUTPUT_PATH}/augmented/negatives',f'{OUTPUT_PATH}/augmented/unlabeled']]
    augmentation_meta = pd.DataFrame(columns=['seriesuid',  'sub_index', 'centers', 'lungs_bounding_box', 'radii',
                                              'class'])

    print("Creating files: ",)

    list_of_positives = []
    list_of_negatives = []
    for rec in preprocess_meta.loc[preprocess_meta['class'] == 1].iloc:
        list_of_positives += _get_patches(rec)
    for rec in preprocess_meta.loc[preprocess_meta['class'] == 0].iloc:
        list_of_negatives += _get_patches(rec)
        # 33 percent of the data will be negative samples
        if len(list_of_negatives) > len(list_of_positives) / 2:
            break
    for rec in preprocess_meta.loc[preprocess_meta['class'] == 2].iloc:
        list_of_positives += _get_patches(rec)

    newRows = list_of_positives + list_of_negatives
    for row in newRows:
        augmentation_meta.loc[len(augmentation_meta)] = row

    if(ANNOTATION_EXIST):
        augmentation_meta.to_csv(f'{OUTPUT_PATH}/augmented_meta.csv')
    else:
        augmentation_meta.to_csv(f'{OUTPUT_PATH}/augmented_meta_unlabeled.csv')

    print("Finished")


if __name__ == '__main__':
    if(ANNOTATION_EXIST):
        print("With annotation")
        p_meta = pd.read_csv(f'{OUTPUT_PATH}/preprocessed_meta.csv', index_col=0)
    else:
        print("No annotation")
        p_meta = pd.read_csv(f'{OUTPUT_PATH}/preprocessed_meta_unlabeled.csv', index_col=0)


    save_augmented_data(p_meta)



[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Saving:  {'augmented/positives/1.3.6.1.4.1.14519.5.2.1.6279.6001.172573195301625265149778785969_1_1_2.npy'}
Center: x: 177  y: 152
Origin:  (64, 136, 106)  Image Shape:  (341, 379, 379)
sri [0, 0, 0], eri [128, 128, 128], soi [0, 72, 42], eoi [128, 200, 170]. img shape: (341, 379, 379)
centers:  [(250, 231, 104)]
Saving:  {'augmented/positives/1.3.6.1.4.1.14519.5.2.1.6279.6001.177685820605315926524514718990_0_0_0.npy'}
Origin:  (170, 136, 106)  Image Shape:  (341, 379, 379)
sri [0, 0, 0], eri [128, 128, 128], soi [106, 72, 42], eoi [234, 200, 170]. img shape: (341, 379, 379)
centers:  [(250, 231, 104)]
Saving:  {'augmented/positives/1.3.6.1.4.1.14519.5.2.1.6279.6001.177685820605315926524514718990_0_0_1.npy'}
Origin:  (277, 136, 106)  Image Shape:  (341, 379, 379)
sri [0, 0, 0], eri [128, 128, 128], soi [213, 72, 42], eoi [341, 200, 170]. img shape: (341, 379, 379)
centers:  [(250, 231, 104)]
Saving:  {'augmented/positives/1.3

In [9]:
# Load the first CSV file into a DataFrame
df1 = pd.read_csv('/content/drive/MyDrive/Preprocessed_Luna_2/augmented_meta_org.csv')

# Load the second CSV file into another DataFrame
df2 = pd.read_csv('/content/drive/MyDrive/Preprocessed_Luna_2/augmented_meta_new.csv')

# Merge the two DataFrames based on a common column (e.g., 'ID')
merged_df = pd.concat([df1, df2], axis=0)
# Alternatively, you can concatenate the DataFrames vertically if they have the same columns
# concatenated_df = pd.concat([df1, df2], ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('/content/drive/MyDrive/Preprocessed_Luna_2/augmented_meta.csv', index=False)