In [1]:
import os
import random
import shutil
from collections import defaultdict

# Set random seed for reproducibility
SEED = 42
random.seed(SEED)

# Paths
amplified_images_dir = "Amplified_Images"
train_dir = "Train_Set_Images"
test_dir = "Test_Set_Images"

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Initialize dictionaries to store counts
train_counts = defaultdict(int)
test_counts = defaultdict(int)

# Function to split images into train and test sets
def split_images_by_class():
    for dx_class in os.listdir(amplified_images_dir):
        class_dir = os.path.join(amplified_images_dir, dx_class)
        images = os.listdir(class_dir)
        
        # Shuffle images to ensure randomness
        random.shuffle(images)
        
        # Split images: 80% train, 20% test
        split_idx = int(0.8 * len(images))
        train_images = images[:split_idx]
        test_images = images[split_idx:]
        
        # Create subfolders for each class in train and test folders
        train_class_dir = os.path.join(train_dir, dx_class)
        test_class_dir = os.path.join(test_dir, dx_class)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        
        # Copy images to train and test directories
        for img in train_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(train_class_dir, img))
            train_counts[dx_class] += 1
        
        for img in test_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(test_class_dir, img))
            test_counts[dx_class] += 1

# Split images
split_images_by_class()

# Print image counts
print("\nImage counts in training set:")
for dx, count in train_counts.items():
    print(f"{dx}: {count} images")

print("\nImage counts in testing set:")
for dx, count in test_counts.items():
    print(f"{dx}: {count} images")



Image counts in training set:
akiec: 784 images
bcc: 822 images
bkl: 879 images
df: 736 images
mel: 890 images
nv: 804 images
vasc: 908 images

Image counts in testing set:
akiec: 197 images
bcc: 206 images
bkl: 220 images
df: 184 images
mel: 223 images
nv: 201 images
vasc: 228 images
