Pre_processing

In [None]:
import os
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split
from staintools import StainNormalizer

# Input and output folder paths
input_folder = os.path.expanduser("~/Desktop/CD_dataset")
output_folder_pre = os.path.expanduser("~/Desktop/CD_Dataset_Pre")
output_folder_train = os.path.expanduser("~/Desktop/CD_Dataset_Pre_Train")
output_folder_test = os.path.expanduser("~/Desktop/CD_Dataset_Pre_Test")

# Create the output folders if they don't exist
os.makedirs(output_folder_pre, exist_ok=True)
os.makedirs(output_folder_train, exist_ok=True)
os.makedirs(output_folder_test, exist_ok=True)

# Define target stains for sparse stain normalization
target_stains = np.array([[0.5626, 0.2159], [0.7201, 0.8012]])

# Function to resize and apply sparse stain normalization
def preprocess_image(image):
    # Resize the image to 512x512
    resized_image = cv2.resize(image, (512, 512))
    
    # Initialize normalizer
    normalizer = StainNormalizer(target_stains=target_stains)
    
    # Normalize the image
    normalized_image = normalizer.transform(resized_image)
    
    return normalized_image

# Function to apply foveal blur
def apply_foveal_blur(image, kernel_size=(21, 21), sigma=3):
    # Apply Gaussian blur to simulate foveal blur
    blurred_image = cv2.GaussianBlur(image, kernel_size, sigmaX=sigma)
    
    return blurred_image

# Process each image in the input folder
image_files = os.listdir(input_folder)
random.shuffle(image_files)  # Shuffle the list for random selection

for filename in image_files:
    if filename.endswith((".jpg", ".png", ".jpeg")):
        image_path = os.path.join(input_folder, filename)
        
        # Read the image
        image = cv2.imread(image_path)
        
        # Apply preprocessing (resize and stain normalization)
        preprocessed_image = preprocess_image(image)
        
        # Save the preprocessed image in the CD_Dataset_Pre folder
        preprocessed_image_path = os.path.join(output_folder_pre, filename)
        cv2.imwrite(preprocessed_image_path, preprocessed_image)
        
        # Apply foveal blur to a random subset of images (402 images)
        if random.random() < 402 / len(image_files):
            # Apply foveal blur
            blurred_image = apply_foveal_blur(preprocessed_image)
            
            # Save the blurred image in the CD_Dataset_Pre_Train folder
            blurred_image_path = os.path.join(output_folder_train, filename)
            cv2.imwrite(blurred_image_path, blurred_image)
        else:
            # Save 20% of non-blurred images in the CD_Dataset_Pre_Test folder
            if random.random() < 0.2:
                test_image_path = os.path.join(output_folder_test, filename)
                cv2.imwrite(test_image_path, preprocessed_image)

print("Data preprocessing and augmentation complete.")
