In [39]:
#Importing the required libraries
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [24]:
# getting the current working directory, and initializing image and label list
base_path = 'Dataset/Animals-10/raw-img'
images = []
labels = []

In [27]:
def load_images_from_subfolders(base_folder, target_size=(128, 128)):
    data = []
    labels = []
    for subdir, _, files in os.walk(base_folder):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
                img_path = os.path.join(subdir, file)
                img = Image.open(img_path)
                img = img.resize(target_size)
                img_array = np.array(img)
                if img_array.shape == (target_size[0], target_size[1], 3): 
                    data.append(img_array)
                    label = os.path.basename(subdir)
                    labels.append(label)
    return data, labels

images, labels = load_images_from_subfolders(base_path)
print(f"Loaded {len(images)} images.")

Loaded 26128 images.


In [32]:
images = np.array(images)
labels = np.array(labels)

In [33]:
print(f"Images shape: {images.shape}")
print(f"Labels shape: {labels.shape}")

Images shape: (26128, 128, 128, 3)
Labels shape: (26128,)


In [36]:
images = images.astype('float32') / 255.0
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

In [38]:
encoder = LabelBinarizer()
train_labels = encoder.fit_transform(train_labels)
test_labels = encoder.transform(test_labels)

In [40]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

datagen.fit(train_images)