In [None]:
import os
import random
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import shutil

# Paths
dataset_path = r"C:\Users\94718\OneDrive\Desktop\NIBM\AI\CourseWork\code\try2\Original"
augmented_path = r"C:\Users\94718\OneDrive\Desktop\NIBM\AI\CourseWork\code\try2\Augmented"
os.makedirs(augmented_path, exist_ok=True)

# Classes
classes = ['Benign', 'Early', 'Pre', 'Pro']

# Target count per class
target_count = 800

# Augmentation pipeline (used only for upsampling Benign)
augmentation = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224,224))
])

# Create output folder structure
for cls in classes:
    os.makedirs(os.path.join(augmented_path, cls), exist_ok=True)

# Process each class
for cls in classes:
    print(f"\nProcessing {cls}...")
    cls_path = os.path.join(dataset_path, cls)
    save_cls_path = os.path.join(augmented_path, cls)
    files = os.listdir(cls_path)
    current_count = len(files)

    if current_count < target_count:
        # Upsample with augmentation
        print(f"{cls}: {current_count} → augmenting to {target_count}")
        for img_file in tqdm(files):
            img = Image.open(os.path.join(cls_path, img_file)).convert("RGB")
            img = img.resize((224,224))
            img.save(os.path.join(save_cls_path, img_file))  # save original

        augment_needed = target_count - current_count
        for i in tqdm(range(augment_needed)):
            img_file = files[i % current_count]
            img = Image.open(os.path.join(cls_path, img_file)).convert("RGB")
            img_aug = augmentation(img)
            save_name = f"{cls.lower()}_aug_{i+1}.jpg"
            img_aug.save(os.path.join(save_cls_path, save_name))

    elif current_count > target_count:
        # Downsample (randomly pick target_count files)
        print(f"{cls}: {current_count} → downsampling to {target_count}")
        selected_files = random.sample(files, target_count)
        for img_file in tqdm(selected_files):
            img = Image.open(os.path.join(cls_path, img_file)).convert("RGB")
            img = img.resize((224,224))
            img.save(os.path.join(save_cls_path, img_file))

    else:
        # Exactly target_count → just resize & copy
        print(f"{cls}: already {target_count}, just resizing")
        for img_file in tqdm(files):
            img = Image.open(os.path.join(cls_path, img_file)).convert("RGB")
            img = img.resize((224,224))
            img.save(os.path.join(save_cls_path, img_file))





Processing Benign...
Benign: 504 → augmenting to 800


100%|██████████| 504/504 [00:02<00:00, 218.44it/s]
100%|██████████| 296/296 [00:01<00:00, 158.06it/s]



Processing Early...
Early: 985 → downsampling to 800


100%|██████████| 800/800 [00:03<00:00, 265.48it/s]



Processing Pre...
Pre: 963 → downsampling to 800


100%|██████████| 800/800 [00:03<00:00, 265.06it/s]



Processing Pro...
Pro: 804 → downsampling to 800


100%|██████████| 800/800 [00:03<00:00, 266.15it/s]


✅ Balanced dataset created in: C:\Users\94718\OneDrive\Desktop\NIBM\AI\CourseWork\code\try2\Augmented





In [6]:

# Paths
dataset_path = r"C:\Users\94718\OneDrive\Desktop\NIBM\AI\CourseWork\code\try2\Augmented"
resized_path = r"C:\Users\94718\OneDrive\Desktop\NIBM\AI\CourseWork\code\try2\Preprocessed"
os.makedirs(resized_path, exist_ok=True)

# Classes
classes = ['Benign', 'Early', 'Pre', 'Pro']

# Create output folders
for cls in classes:
    os.makedirs(os.path.join(resized_path, cls), exist_ok=True)

# Resize all images
for cls in classes:
    print(f"Resizing {cls}...")
    cls_path = os.path.join(dataset_path, cls)
    save_cls_path = os.path.join(resized_path, cls)
    for img_file in tqdm(os.listdir(cls_path)):
        img = Image.open(os.path.join(cls_path, img_file)).convert("RGB")
        img = img.resize((224,224))
        img.save(os.path.join(save_cls_path, img_file))

print(" All images resized to 224x224 and saved")


Resizing Benign...


100%|██████████| 800/800 [00:11<00:00, 72.69it/s]


Resizing Early...


100%|██████████| 800/800 [00:11<00:00, 70.99it/s]


Resizing Pre...


100%|██████████| 800/800 [00:10<00:00, 74.24it/s]


Resizing Pro...


100%|██████████| 800/800 [00:11<00:00, 72.53it/s]

 All images resized to 224x224 and saved



