In [5]:
import os
import shutil
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
dataset_path = "dataset"
train_path = "dataset/train"
test_path = "dataset/test"

# Ensure train and test directories exist
for folder in [train_path, test_path]:
    os.makedirs(folder, exist_ok=True)

# Iterate through Level_0, Level_1, Level_2
for category in ["Level_0", "Level_1", "Level_2"]:
    category_path = os.path.join(dataset_path, category)

    # Skip if it's not a directory
    if not os.path.isdir(category_path):
        continue  

    # Collect all image files
    images = [f for f in os.listdir(category_path) if f.lower().endswith((".jpg", ".png", ".jpeg"))]

    # Shuffle for randomness
    random.shuffle(images)

    # Split 80% train, 20% test
    split_index = int(0.8 * len(images))
    train_images, test_images = images[:split_index], images[split_index:]

    # Create category subfolders in train/test
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)

    # Copy images to train/test (Keeping original dataset)
    for img in train_images:
        shutil.copy(os.path.join(category_path, img), os.path.join(train_path, category, img))

    for img in test_images:
        shutil.copy(os.path.join(category_path, img), os.path.join(test_path, category, img))

    # Ensure images are unique in train/test by removing them from the original folder
    for img in images:
        os.remove(os.path.join(category_path, img))

print("✅ Dataset split successfully! Original dataset retained, and train/test sets are unique.")

✅ Dataset split successfully! Original dataset retained, and train/test sets are unique.


In [4]:
# Define paths
train_dir = "dataset/train"
test_dir = "dataset/test"

def preprocess_image(img):
    """ Resize the image to (128,128) without altering other properties """
    img = np.array(img, dtype=np.float32)  # Ensure float32
    img = cv2.resize(img, (128, 128), interpolation=cv2.INTER_CUBIC)  # High-quality resizing
    return img  # Return unchanged, only resized image


# Define ImageDataGenerator (without additional augmentation)
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_image)

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_image)

# Load images from directories
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),  # Already resized by preprocessing_function
    batch_size=32,
    class_mode='categorical'
)

test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

# Get class labels 
class_labels = list(train_data.class_indices.keys())
print("Class labels:", class_labels)


Found 798 images belonging to 3 classes.
Found 201 images belonging to 3 classes.
Class labels: ['Level_0', 'Level_1', 'Level_2']


In [6]:
import numpy as np
from collections import Counter

# Count images per class in training data
train_class_counts = Counter(train_data.labels)
test_class_counts = Counter(test_data.labels)

# Map indices to class names
class_labels = list(train_data.class_indices.keys())

# Print results
print("Train Dataset:")
for i, count in train_class_counts.items():
    print(f"{class_labels[i]}: {count} images")

print("\nTest Dataset:")
for i, count in test_class_counts.items():
    print(f"{class_labels[i]}: {count} images")


Train Dataset:
Level_0: 309 images
Level_1: 378 images
Level_2: 111 images

Test Dataset:
Level_0: 78 images
Level_1: 95 images
Level_2: 28 images
