In [1]:
import os
import numpy as np
import xml.etree.ElementTree as ET
from PIL import Image
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import label_binarize, MultiLabelBinarizer
from skimage.transform import resize
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [2]:
class ImageClassifier:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.images_path = os.path.join(dataset_path, 'JPEGImages')
        self.annotations_path = os.path.join(dataset_path, 'Annotations')
        self.vgg_model = VGG19(weights='imagenet', include_top=True)
        self.feature_extractor = Model(inputs=self.vgg_model.input, outputs=self.vgg_model.get_layer('fc2').output)
        self.features = []  # List to store extracted features
        self.labels = []  # List to store labels
        self.mlb = MultiLabelBinarizer()  # MultiLabelBinarizer for label binarization
        self.classifiers = {}  # Dictionary to store trained classifiers
        self.val_labels = None
        self.val_features = None
        self.train_labels = None
        self.train_features = None

    def load_images_and_annotations(self):
        """
        Load images and corresponding annotations from the dataset.
        """
        image_dir = os.listdir(self.images_path)

        for image_path in image_dir:
            image_id = os.path.join(self.images_path, image_path)
            img = image.load_img(image_id, target_size=(224, 224))
            img = image.img_to_array(img)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)

            img_features = self.feature_extractor.predict(img)
            self.features.append(img_features)

            annotation_file = image_path.split('.')[0]
            annotation_path = os.path.join(self.annotations_path, annotation_file + '.xml')

            tree = ET.parse(annotation_path)
            root = tree.getroot()

            class_labels = []
            for obj in root.findall('object'):
                class_label = obj.find('name').text
                class_labels.append(class_label)
            self.labels.append(class_labels)

        self.features = np.vstack(self.features)
        self.labels = np.array(self.labels)

    def train_classifiers(self):
        """
        Train SVM classifiers for each class using the extracted features and labels.
        """
        self.labels = self.mlb.fit_transform(self.labels)
        self.train_features, self.val_features, self.train_labels, self.val_labels = train_test_split(self.features, self.labels, test_size=0.3, random_state=7)

        for class_index in range(self.train_labels.shape[1]):
            class_train_labels = self.train_labels[:, class_index]

            svm = SVC()
            svm.fit(self.train_features, class_train_labels)
            self.classifiers[self.mlb.classes_[class_index]] = svm

    def evaluate(self):
        """
        Evaluate the trained classifiers on the validation set.
        """
        accuracies = []
        confusion_matrix = np.zeros((50, 50), dtype=int)

        for class_index in range(self.val_labels.shape[1]):
            class_val_labels = self.val_labels[:, class_index]
            predicted_labels = self.classifiers[self.mlb.classes_[class_index]].predict(self.val_features)
            accuracy = np.mean(predicted_labels == class_val_labels)
            accuracies.append(accuracy)

            for i in range(len(predicted_labels)):
                true_label = class_val_labels[i]
                predicted_label = predicted_labels[i]
                confusion_matrix[true_label, predicted_label] += 1

        for class_label, accuracy in zip(self.mlb.classes_, accuracies):
            print(f'Accuracy for class {class_label}: {accuracy}')

        print(confusion_matrix)

In [None]:
# Set the path to the dataset folder on your local machine
dataset_path = '/content/drive/MyDrive/Colab Notebooks/VOCdevkit/VOC2007/'

# Create an instance of the ImageClassifier class
classifier = ImageClassifier(dataset_path)

# Load images and annotations
classifier.load_images_and_annotations()

# Train classifiers
classifier.train_classifiers()

In [4]:
# Evaluate classifiers
classifier.evaluate()

Accuracy for class aeroplane: 0.9933510638297872
Accuracy for class bicycle: 0.9707446808510638
Accuracy for class bird: 0.9853723404255319
Accuracy for class boat: 0.9873670212765957
Accuracy for class bottle: 0.9554521276595744
Accuracy for class bus: 0.9740691489361702
Accuracy for class car: 0.9481382978723404
Accuracy for class cat: 0.976063829787234
Accuracy for class chair: 0.918218085106383
Accuracy for class cow: 0.9853723404255319
Accuracy for class diningtable: 0.9700797872340425
Accuracy for class dog: 0.964095744680851
Accuracy for class horse: 0.976063829787234
Accuracy for class motorbike: 0.9840425531914894
Accuracy for class person: 0.8949468085106383
Accuracy for class pottedplant: 0.9507978723404256
Accuracy for class sheep: 0.9787234042553191
Accuracy for class sofa: 0.9481382978723404
Accuracy for class train: 0.992686170212766
Accuracy for class tvmonitor: 0.96875
[[27580   129     0 ...     0     0     0]
 [  890  1481     0 ...     0     0     0]
 [    0     0  