In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical




function to create label

In [4]:
# function to create label(dictionary)
def load_datasets(dataset_categories):
    images = []
    labels = []

    for dataset_id, (dataset_path, category) in enumerate(dataset_categories.items()):
        for object_category in os.listdir(dataset_path):
            category_path = os.path.join(dataset_path, object_category)

            for file in os.listdir(category_path):
                img_path = os.path.join(category_path, file)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (224, 224))  # resize all images
                images.append(img)
                labels.append({
                    'category': category,
                    'dataset_id': dataset_id
                })

    return images, labels

loading datasets

In [5]:
# define dataset paths and corresponding category names
dataset_categories = {
    r'images\bee_image_sample': 'bee',
    r'images\football_image_sample': 'football',
    r'images\keyboard_image_sample': 'keyboard',
    r'images\laptop_image_sample': 'laptop',
    r'images\Letter_M_image_sample': 'letter M',
    r'images\Letter_T_image_sample': 'letter T',
    r'images\monitor_image_sample': 'monitor',
    r'images\mouse_image_sample': 'mouse',
    r'images\trains_image_sample': 'trains',
}

In [6]:
# verifying dataset is loaded correctly 
for dataset_path, category in dataset_categories.items():
    num_images = sum(len(files) for _, _, files in os.walk(dataset_path))
    print(f"dataset: {dataset_path}, category: {category}, no. of images: {num_images}")

dataset: images\bee_image_sample, category: bee, no. of images: 275
dataset: images\football_image_sample, category: football, no. of images: 275
dataset: images\keyboard_image_sample, category: keyboard, no. of images: 295
dataset: images\laptop_image_sample, category: laptop, no. of images: 136
dataset: images\Letter_M_image_sample, category: letter M, no. of images: 295
dataset: images\Letter_T_image_sample, category: letter T, no. of images: 295
dataset: images\monitor_image_sample, category: monitor, no. of images: 250
dataset: images\mouse_image_sample, category: mouse, no. of images: 275
dataset: images\trains_image_sample, category: trains, no. of images: 235


load and preprocess the datasets

In [7]:
images, labels = load_datasets(dataset_categories)
print(images,labels)

# convert labels to numerical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform([label['category'] for label in labels])
encoded_labels = to_categorical(encoded_labels)

[array([[[ 3, 32, 23],
        [ 5, 36, 27],
        [ 7, 41, 31],
        ...,
        [30, 67, 93],
        [32, 69, 95],
        [32, 69, 95]],

       [[ 5, 36, 27],
        [ 7, 40, 31],
        [10, 44, 34],
        ...,
        [27, 64, 90],
        [28, 65, 91],
        [29, 66, 92]],

       [[ 8, 40, 32],
        [11, 44, 35],
        [13, 48, 40],
        ...,
        [23, 60, 86],
        [25, 61, 88],
        [25, 62, 88]],

       ...,

       [[ 6, 32, 29],
        [ 6, 33, 30],
        [ 6, 34, 32],
        ...,
        [10, 64, 64],
        [11, 65, 65],
        [10, 67, 66]],

       [[ 4, 34, 31],
        [ 6, 36, 33],
        [ 7, 39, 35],
        ...,
        [ 9, 63, 63],
        [10, 65, 63],
        [11, 66, 64]],

       [[ 7, 38, 35],
        [ 9, 40, 37],
        [10, 43, 39],
        ...,
        [ 9, 63, 63],
        [10, 65, 62],
        [11, 66, 63]]], dtype=uint8), array([[[163, 181, 190],
        [161, 183, 195],
        [160, 180, 197],
        ...,
  

splitting into train / test data

In [8]:
# split the dataset into training, testing sets
X_train, X_test, y_train, y_test = train_test_split(np.array(images), encoded_labels, test_size=0.2, random_state=42)

print("Number of Datasets:", len(dataset_categories))

Number of Datasets: 9
