In [3]:
!unzip /content/sample_data/amazon.zip -d office-caltech-amazon-data


Archive:  /content/sample_data/amazon.zip
   creating: office-caltech-amazon-data/backpack/
  inflating: office-caltech-amazon-data/backpack/frame_0001.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0002.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0003.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0004.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0005.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0006.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0007.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0008.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0009.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0010.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0011.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0012.jpg  
  inflating: office-caltech-amazon-data/backpack/frame_0013.jpg  
  inflating: office-caltech-amazon-data/backpack/f

In [16]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

class DatasetReader:
    def __init__(self, data_dir, train_ratio=0.8):
        self.data_dir = data_dir
        self.train_ratio = train_ratio
        self.image_paths = []
        self.labels = []
        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.num_classes = 0

    def load_dataset(self):
        label_to_index = {}
        index = 0

        for root, dirs, files in os.walk(self.data_dir):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    image_path = os.path.join(root, file)
                    label = os.path.basename(root)

                    if label not in label_to_index:
                        label_to_index[label] = index
                        index += 1

                    self.image_paths.append(image_path)
                    self.labels.append(label_to_index[label])

        self.num_classes = len(label_to_index)

        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
            self.image_paths, self.labels, test_size=1 - self.train_ratio, random_state=42, stratify=self.labels)

    def preprocess_images(self, image_paths):
        images = []

        for path in image_paths:
            image = cv2.imread(path)
            image = cv2.resize(image, (224, 224))
            image = image.astype("float32") / 255.0
            images.append(image)

        return np.array(images)

    def preprocess_labels(self, labels):
        label_binarizer = LabelBinarizer()
        label_binarizer.fit(range(self.num_classes))
        labels = label_binarizer.transform(labels)

        return labels

    def get_train_data(self):
        x_train = self.preprocess_images(self.x_train)
        y_train = self.preprocess_labels(self.y_train)
        return x_train, y_train

    def get_test_data(self):
        x_test = self.preprocess_images(self.x_test)
        y_test = self.preprocess_labels(self.y_test)
        return x_test, y_test


# Load and preprocess the dataset
data_dir = "/content/office-caltech-amazon-data"
dataset = DatasetReader(data_dir)
dataset.load_dataset()
x_train, y_train = dataset.get_train_data()
x_test, y_test = dataset.get_test_data()

# Create the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(dataset.num_classes, activation="softmax"))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.3774633407592773
Test Accuracy: 0.7239583134651184
