In [1]:
import shutil
from tqdm.auto import tqdm
import glob
import os

from sklearn.model_selection import train_test_split
import zipfile

[H[2J

In [None]:
import sys
# Don't generate the __pycache__ folder locally
sys.dont_write_bytecode = True 
# Print exception without the buit-in python warning
sys.tracebacklimit = 0

from Prepare_Dataset_Modules import *

In [2]:
minimum_labels = 4

patch_size = 800
overlap = int(0.25 * patch_size)

n_augmentations = 2 # Number of augmentations

split_percentage = 0.2 # Train 80%, Validation 20%

In [3]:
# ### Generate patches from Images and Masks

# Ensure main directory exists or create it
if os.path.exists('Annotations'):
    shutil.rmtree('Annotations')

# Path to the zip file
zip_file_path = 'Annotations.zip'

# Unzip the file to the current working directory
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall()

directory = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Training')

# Ensure directory structures exist
if os.path.exists(directory):
    shutil.rmtree(directory)

shutil.copytree('Annotations', directory)

relabel_masks_in_folder(os.path.join('Annotations', 'Masks'))
relabel_masks_in_folder(os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Test', 'Masks'))

print()

Sequentially relabelling Masks in Annotations/Masks:   0%|          | 0/180 [00:00<?, ?it/s]

Sequentially relabelling Masks in /home/ajinkya/Desktop/PyOrganoidAnalysis/Dataset/Test/Masks:   0%|          …




In [4]:
directories = ['Augmented', 'Validation']

# Generate directories based on conditions
for directory in tqdm(directories, desc = 'Creating Augmented and Validation directories'):

    # Ensure main directory exists or create it
    dir_path = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', directory)
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    os.makedirs(dir_path)

    # Remove existing 'Images' and 'Masks' directories if they exist
    for sub_dir in ['Images', 'Masks']:
        path = os.path.join(dir_path, sub_dir)

        if os.path.exists(path):
            shutil.rmtree(path)
        os.makedirs(path)

Creating Augmented and Validation directories:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
count_image_names_and_check_masks(os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Training'))


Number of Images with 'human_' prefix: 94
Number of Images with 'mouse_' prefix: 86



In [6]:
extract_patches(os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Training'), 
                patch_size, overlap, minimum_labels)

Extracting patches:   0%|          | 0/180 [00:00<?, ?it/s]

In [7]:
# ### Split the dataset in Train, Validation

# Fetch image paths and corresponding mask paths

base_path = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Training')

images_path = sorted(glob.glob(os.path.join(base_path, 'Images', '*.tif')))
masks_path = [f.replace('Images', 'Masks').replace('.tif', '_mask.tif') for f in images_path]

# Split based on split_percentage
train_images, validation_images, train_masks, validation_masks = train_test_split(images_path, masks_path, test_size=split_percentage)

print()

# Display split statistics
print(f"Number of training Images: {len(train_images)} ")
print(f"Number of training Masks: {len(train_masks)}")

print(f"Number of validation Images: {len(validation_images)}")
print(f"Number of validation Masks: {len(validation_masks)}")

# Move the Validation Images and Masks to the Validation folder
for validation_img, validation_mask in zip(validation_images, validation_masks):
    dest_img_path = validation_img.replace('Training', 'Validation')
    dest_mask_path = validation_mask.replace('Training', 'Validation')
    shutil.move(validation_img, dest_img_path)
    shutil.move(validation_mask, dest_mask_path)


Number of training Images: 713 
Number of training Masks: 713
Number of validation Images: 179
Number of validation Masks: 179


In [8]:
if n_augmentations >= 2:

    print()

    # Fetch training image paths (only the training set after split)
    base_path = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset', 'Training')

    train_images = sorted(glob.glob(os.path.join(base_path, 'Images', '*.tif')))
    train_masks = [f.replace('Images', 'Masks').replace('.tif', '_mask.tif') for f in train_images]

    # Filter Images that start with "mouse_"
    mouse_images = [img for img in train_images if os.path.basename(img).startswith('mouse_')]

    # Augmentation Process
    for img_path, mask_path in tqdm(list(zip(train_images, train_masks)), desc=f'Augmenting Images from Training ({n_augmentations}x)', leave=True):

        for aug_idx in range(n_augmentations):
            image = Image.open(img_path).convert('L')
            mask = Image.open(mask_path)

            image_np = np.array(image).astype(np.uint8)
            mask_np = np.array(mask).astype(np.uint16)

            aug_image_np, aug_mask_np = augment(image_np, mask_np)

            aug_image = Image.fromarray(aug_image_np)
            aug_mask = Image.fromarray(aug_mask_np)

            base_img_name = os.path.basename(img_path).replace('.tif', f'_aug{aug_idx}.tif')
            base_mask_name = os.path.basename(mask_path).replace('_mask.tif', f'_aug{aug_idx}_mask.tif')

            aug_img_path = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset',
                                                     'Augmented', 'Images', base_img_name)
                                        
            aug_mask_path = os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 'Dataset',
                                                     'Augmented', 'Masks', base_mask_name)

            aug_image.save(aug_img_path)
            aug_mask.save(aug_mask_path)




Augmenting Images from Training (2x):   0%|          | 0/713 [00:00<?, ?it/s]

In [9]:
check_data_sanity()

print()

validate_and_count_images()


In Training:
Number of Images: 713
Number of Masks: 713
----------------------------
In Validation:
Number of Images: 179
Number of Masks: 179
----------------------------
In Augmented:
Number of Images: 1426
Number of Masks: 1426
----------------------------

All Images and Masks meet the requirements:
Images: Data type - <class 'numpy.uint8'>, Value range - (0, 255)
Masks: Data type - <class 'numpy.uint16'>, Value range - (0, 65535)

In Training:
Number of Images starting with 'human_': 362
Number of Images starting with 'mouse_': 351
Number of Masks starting with 'human_': 362
Number of Masks starting with 'mouse_': 351

In Validation:
Number of Images starting with 'human_': 100
Number of Images starting with 'mouse_': 79
Number of Masks starting with 'human_': 100
Number of Masks starting with 'mouse_': 79

In Augmented:
Number of Images starting with 'human_': 724
Number of Images starting with 'mouse_': 702
Number of Masks starting with 'human_': 724
Number of Masks starting wi

In [10]:
count_organoid_number_by_type(os.path.join('Annotations', 'Masks'))

Calculating statistics on Masks:   0%|          | 0/180 [00:00<?, ?it/s]

Total number of Human Organoids in 'Annotations/Masks' for 94 Masks is: 6075
Total number of Mouse Organoids in 'Annotations/Masks' for 86 Masks is: 17310


In [16]:
count_organoid_number_by_type(os.path.join('/home', 'ajinkya', 'Desktop', 'PyOrganoidAnalysis', 
                                           'Dataset', 'Test', 'Masks'))

Calculating statistics on Masks:   0%|          | 0/10 [00:00<?, ?it/s]

Total number of Human Organoids in '/home/ajinkya/Desktop/PyOrganoidAnalysis/Dataset/Test/Masks' for 5 Masks is: 365
Total number of Mouse Organoids in '/home/ajinkya/Desktop/PyOrganoidAnalysis/Dataset/Test/Masks' for 5 Masks is: 8633
