In [2]:
import os
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

# Configuration
SOURCE_DIR = "C:\Cancer_8_Types_Balanced"     # Your original grouped dataset
DEST_DIR = "C:\Preprocessed_8_Types"          # Output directory
IMG_SIZE = (512, 512)

# Function to load and preprocess image safely
def preprocess_image(src_path):
    img = cv2.imread(src_path, cv2.IMREAD_UNCHANGED)

    if img is None:
        print(f"Warning: Failed to load {src_path}")
        return None

    # Convert to RGB if needed
    if len(img.shape) == 2:  # grayscale
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    elif img.shape[2] == 4:  # RGBA
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
    else:  # BGR
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resize and normalize
    img = cv2.resize(img, IMG_SIZE)
    img = img.astype(np.float32) / 255.0
    return img

# Process images from each class folder
cancer_types = os.listdir(SOURCE_DIR)

for cancer_type in cancer_types:
    src_folder = Path(SOURCE_DIR) / cancer_type
    dest_folder = Path(DEST_DIR) / cancer_type
    dest_folder.mkdir(parents=True, exist_ok=True)

    for fname in tqdm(os.listdir(src_folder), desc=f"Processing {cancer_type}"):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            src_path = src_folder / fname
            dest_path = dest_folder / fname

            img = preprocess_image(str(src_path))
            if img is not None:
                # Save as float image (scaled back to 0–255 for saving)
                img_to_save = (img * 255).astype(np.uint8)
                cv2.imwrite(str(dest_path), img_to_save)

print("✅ Preprocessing complete. Check the 'Preprocessed_8_Types' folder.")


  SOURCE_DIR = "C:\Cancer_8_Types_Balanced"     # Your original grouped dataset
  DEST_DIR = "C:\Preprocessed_8_Types"          # Output directory
Processing ALL: 100%|██████████| 10574/10574 [03:35<00:00, 49.02it/s]
Processing Brain: 100%|██████████| 5371/5371 [01:50<00:00, 48.72it/s]
Processing Breast: 100%|██████████| 6859/6859 [02:50<00:00, 40.34it/s]
Processing Cervical: 100%|██████████| 12435/12435 [05:13<00:00, 39.69it/s]
Processing Kidney: 100%|██████████| 6817/6817 [02:41<00:00, 42.32it/s]
Processing Lung_Colon: 100%|██████████| 15619/15619 [06:31<00:00, 39.88it/s]
Processing Lymphoma: 100%|██████████| 5430/5430 [02:30<00:00, 36.06it/s]
Processing Oral: 100%|██████████| 6895/6895 [02:43<00:00, 42.29it/s]

✅ Preprocessing complete. Check the 'Preprocessed_8_Types' folder.





In [None]:
import os
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

# Configuration for EfficientNetB3
SOURCE_DIR = "C:\Cancer_8_Types_Balanced"     # Original grouped dataset
DEST_DIR = "C:\Preprocessed_8_Types_B3"       # Output directory
IMG_SIZE = (300, 300)  # EfficientNetB3 expects 300x300

# Function to load and preprocess image safely
def preprocess_image(src_path):
    img = cv2.imread(src_path, cv2.IMREAD_UNCHANGED)

    if img is None:
        print(f"Warning: Failed to load {src_path}")
        return None

    # Convert to RGB if needed
    if len(img.shape) == 2:  # Grayscale
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    elif img.shape[2] == 4:  # RGBA
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
    else:  # BGR
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resize and normalize
    img = cv2.resize(img, IMG_SIZE)
    img = img.astype(np.float32) / 255.0
    return img

# Process images from each cancer type folder
cancer_types = os.listdir(SOURCE_DIR)

for cancer_type in cancer_types:
    src_folder = Path(SOURCE_DIR) / cancer_type
    dest_folder = Path(DEST_DIR) / cancer_type
    dest_folder.mkdir(parents=True, exist_ok=True)

    for fname in tqdm(os.listdir(src_folder), desc=f"Processing {cancer_type}"):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            src_path = src_folder / fname
            dest_path = dest_folder / fname

            img = preprocess_image(str(src_path))
            if img is not None:
                img_to_save = (img * 255).astype(np.uint8)
                cv2.imwrite(str(dest_path), img_to_save)

print("✅ Preprocessing complete for EfficientNetB3. Images saved to:", DEST_DIR)
