In [3]:
import os
import cv2 as cv
import numpy as np
import random

# Anisotropic Diffusion function, Reinhard Stain Normalizer class, and all required functions remain the same.

# Directories
input_dir = './demo_datasets_week8'
output_dir = './Demo_Dataset(reinhard+flip)'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Reinhard Stain Normalizer
def standardize_brightness(I):
    p = np.percentile(I, 90)
    return np.clip(I * 255.0 / p, 0, 255).astype(np.uint8)

def lab_split(I):
    I = cv.cvtColor(I, cv.COLOR_RGB2LAB)
    I = I.astype(np.float32)
    I1, I2, I3 = cv.split(I)
    I1 /= 2.55
    I2 -= 128.0
    I3 -= 128.0
    return I1, I2, I3

def merge_back(I1, I2, I3):
    I1 *= 2.55
    I2 += 128.0
    I3 += 128.0
    I = np.clip(cv.merge((I1, I2, I3)), 0, 255).astype(np.uint8)
    return cv.cvtColor(I, cv.COLOR_LAB2RGB)

def get_mean_std(I):
    I1, I2, I3 = lab_split(I)
    m1, sd1 = cv.meanStdDev(I1)
    m2, sd2 = cv.meanStdDev(I2)
    m3, sd3 = cv.meanStdDev(I3)
    means = m1, m2, m3
    stds = sd1, sd2, sd3
    return means, stds

class Normalizer:
    def __init__(self):
        self.target_means = None
        self.target_stds = None

    def fit(self, target):
        target = standardize_brightness(target)
        means, stds = get_mean_std(target)
        self.target_means = means
        self.target_stds = stds

    def transform(self, I):
        I = standardize_brightness(I)
        I1, I2, I3 = lab_split(I)
        means, stds = get_mean_std(I)
        norm1 = ((I1 - means[0]) * (self.target_stds[0] / stds[0])) + self.target_means[0]
        norm2 = ((I2 - means[1]) * (self.target_stds[1] / stds[1])) + self.target_means[1]
        norm3 = ((I3 - means[2]) * (self.target_stds[2] / stds[2])) + self.target_means[2]
        return merge_back(norm1, norm2, norm3)
        
normalizer = Normalizer()
target_image_path = './target_image.jpg'
target_image = cv.imread(target_image_path, cv.IMREAD_COLOR)

if target_image is None:
    raise FileNotFoundError(f"Target image not found at path: {target_image_path}")

normalizer.fit(cv.cvtColor(target_image, cv.COLOR_BGR2RGB))

# Process each class folder within the dataset directory
for class_folder in os.listdir(input_dir):
    class_path = os.path.join(input_dir, class_folder)
    
    if not os.path.isdir(class_path):
        print(f"Skipping non-directory entry: {class_path}")
        continue
    
    # Make sure the output directory for the class exists
    output_class_path = os.path.join(output_dir, class_folder)
    if not os.path.exists(output_class_path):
        os.makedirs(output_class_path)
    
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        image = cv.imread(image_path, cv.IMREAD_COLOR)
        
        if image is None:
            print(f"Failed to load image: {image_path}")
            continue
        
        image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        
        # Apply Anisotropic Diffusion and Reinhard Stain Normalization
        #diffused_image = anisodiff_color(image_rgb, niter=10, kappa=100, gamma=0.1, step=(1., 1.), option=1)
        normalized_image = normalizer.transform(image_rgb)
        
        # Randomly decide to apply a horizontal flip
        if random.random() < 0.1:
            final_image = cv.flip(normalized_image, 1)  # Mirror flip horizontally
        else:
            final_image = normalized_image
        
        # Save the final image
        output_image_path = os.path.join(output_class_path, image_name)
        cv.imwrite(output_image_path, cv.cvtColor(final_image, cv.COLOR_RGB2BGR))
        print(f"Processed and saved: {output_image_path}")

Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\1015_CRC-Prim-HE-10_020.tif_Row_601_Col_601.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\1040C_CRC-Prim-HE-05_032.tif_Row_1051_Col_2101.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\1050A_CRC-Prim-HE-03_012.tif_Row_301_Col_1351.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\109EF_CRC-Prim-HE-03_012.tif_Row_3151_Col_901.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\10C21_CRC-Prim-HE-05_032.tif_Row_1201_Col_3901.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\110E3_CRC-Prim-HE-07.tif_Row_301_Col_3901.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\11472_CRC-Prim-HE-07.tif_Row_1201_Col_2701.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\1157_CRC-Prim-HE-05_032.tif_Row_1051_Col_2701.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\11744_CRC-Prim-HE-07.tif_Row_1_Col_3301.tif
Processed and saved: ./Demo_Dataset(reinhard+flip)\ADI\1174E_CRC-Pri

In [4]:
import os

# Define the path to the processed dataset directory
output_dir = './Demo_Dataset(reinhard+flip)'

# Initialize a dictionary to hold the counts for each class
class_counts = {}

# Iterate through each class folder in the output directory
for class_folder in os.listdir(output_dir):
    class_path = os.path.join(output_dir, class_folder)
    
    # Check if it is a directory
    if os.path.isdir(class_path):
        # Count the number of image files in the class folder
        num_images = len([img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'))])
        class_counts[class_folder] = num_images

# Print the counts for each class
for class_name, count in class_counts.items():
    print(f"Class '{class_name}': {count} images")


Class 'ADI': 1000 images
Class 'DEBRIS_MUCUS': 1000 images
Class 'LYM': 1000 images
Class 'MUSC_STROMA': 1000 images
Class 'NORM': 1000 images
Class 'TUM': 1000 images
