In [3]:
import os
import shutil
import random
from pathlib import Path

def split_data(source_folder, train_folder, test_folder, split_ratio=0.8):
    # Create train and test folders if they don't exist
    Path(train_folder).mkdir(parents=True, exist_ok=True)
    Path(test_folder).mkdir(parents=True, exist_ok=True)

    # Get a list of all image and mask files
    image_files = os.listdir(os.path.join(source_folder, "image"))
    mask_files = os.listdir(os.path.join(source_folder, "mask"))

    # Sort to make sure images and masks are paired correctly
    image_files.sort()
    mask_files.sort()

    # Shuffle the files for random selection
    combined_files = list(zip(image_files, mask_files))
    random.shuffle(combined_files)
    image_files, mask_files = zip(*combined_files)

    # Calculate the split index
    split_index = int(len(image_files) * split_ratio)

    # Copy images and masks to the train folder
    for image_file, mask_file in zip(image_files[:split_index], mask_files[:split_index]):
        shutil.copy(os.path.join(source_folder, "image", image_file), os.path.join(train_folder, "image", image_file))
        shutil.copy(os.path.join(source_folder, "mask", mask_file), os.path.join(train_folder, "mask", mask_file))

    # Copy images and masks to the test folder
    for image_file, mask_file in zip(image_files[split_index:], mask_files[split_index:]):
        shutil.copy(os.path.join(source_folder, "image", image_file), os.path.join(test_folder, "image", image_file))
        shutil.copy(os.path.join(source_folder, "mask", mask_file), os.path.join(test_folder, "mask", mask_file))

if __name__ == "__main__":
    source_folder = "/content/drive/MyDrive/Internship 2023/Skull Strip based on deep learning/2-2D Slices/Sample"
    train_folder = "/content/drive/MyDrive/Internship 2023/Skull Strip based on deep learning/Dataset_Train_Test/Train"
    test_folder = "/content/drive/MyDrive/Internship 2023/Skull Strip based on deep learning/Dataset_Train_Test/Test"
    split_ratio = 0.8  # 80% for training, 20% for testing

    split_data(source_folder, train_folder, test_folder, split_ratio)


# **Sanity Check**

- To see if the image in the train and test folder have their corresponding brain masks

In [7]:
import os

def check_masks_for_train_images(train_folder, mask_folder):
    # Get a list of image filenames in the train folder
    train_image_filenames = os.listdir(train_folder)

    # Loop through each image in the train folder
    for image_filename in train_image_filenames:
        # Get the corresponding mask filename
        mask_filename = image_filename.replace(".jpg", "_mask.jpg")

        # Check if the corresponding mask exists in the mask folder
        if not os.path.exists(os.path.join(mask_folder, mask_filename)):
            print(f"Mask not found for image: {image_filename}")
            return False

    print("All train images have corresponding masks.")
    return True

# Replace these paths with the actual paths to your train and mask folders
train_folder_path = "/content/drive/MyDrive/Internship 2023/Skull Strip based on deep learning/Dataset_Train_Test/Train/image"
mask_folder_path = "/content/drive/MyDrive/Internship 2023/Skull Strip based on deep learning/Dataset_Train_Test/Train/mask"

check_masks_for_train_images(train_folder_path, mask_folder_path)


All train images have corresponding masks.


True