In [7]:
import numpy as np
import cv2 as cv
import os
import pandas as pd
import sklearn
import sklearn.cluster
import sklearn.metrics
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
import glob
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

class Recognizer:
    def __init__(self):
        # Initialize the class with the names of the objects to recognize
        self.nameOfClasses = ["toothbrush_4", "shampoo_6", "onion_1", "marker_6", "hand_towel_5", "garlic_4", "food_box_11", "cereal_box_5", "calculator_1", "bell_pepper_2"]
        self.label_encoder = LabelEncoder()  
        self.label_encoder.fit(self.nameOfClasses)  # Fit the label encoder with the class names
        self.datasetDirectory = os.path.join(os.getcwd(), "datasets")  # Path to the datasets directory
        self.outputDirectory = os.path.join(os.getcwd(), "output")  # Path to the output directory for saving keypoint visualizations
        os.makedirs(self.outputDirectory, exist_ok=True)  # Create the output directory if it doesn't exist
        self.trainingSet = {}  # Dictionary to hold training images for each class
        self.testSet = {}  # Dictionary to hold testing images for each class
        self.load_data()

    def load_data(self):
        # Check if the dataset directory exists
        if not os.path.exists(self.datasetDirectory):
            print(f"Dataset directory {self.datasetDirectory} does not exist.")
            return

        # Load images for each class
        for obj in self.nameOfClasses:
            images = self.load_images(obj)
            if len(images) < 2:
                print(f"Skipping {obj} due to insufficient images.")
                continue
            # Split the images into training and testing sets
            train_images, test_images = train_test_split(images, test_size=0.1, random_state=42)
            self.trainingSet[obj] = train_images
            self.testSet[obj] = test_images
        print("Data loading complete.")
        
        # Balance the training data by ensuring each class has the same number of images
        min_images = min(len(self.trainingSet[obj]) for obj in self.nameOfClasses)
        for obj in self.nameOfClasses:
            self.trainingSet[obj] = self.trainingSet[obj][:min_images]
        print("Balanced training data")

    def load_images(self, object_name):
        images = []
        object_directory = os.path.join(self.datasetDirectory, object_name)
        
        # Check if the object directory exists
        if not os.path.exists(object_directory):
            print(f"Object directory {object_directory} does not exist.")
            return images
        
        # Load images from the object directory
        for img_path in glob.glob(os.path.join(object_directory, "*_crop.png")):
            img = cv.imread(img_path)
            if img is not None:
                images.append(img)
        print(f"Loaded {len(images)} images for {object_name}")
        return images

    def detect_and_compute(self, image, method='orb', visualize=False, output_path=None):
        # Convert the image to grayscale
        grayscaleImage = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        # Initialize the feature detector based on the specified method
        if method == 'orb':
            detector = cv.ORB_create()
        elif method == 'brisk':
            detector = cv.BRISK_create()
        elif method == 'akaze':
            detector = cv.AKAZE_create()
        else:
            raise ValueError(f"Unsupported method: {method}")
        # Detect keypoints and compute descriptors
        keypoints, descriptors = detector.detectAndCompute(grayscaleImage, None)
        
        # Visualize and save keypoints if requested
        if visualize and output_path:
            image_with_keypoints = cv.drawKeypoints(image, keypoints, None, color=(0, 255, 0))
            cv.imwrite(output_path, image_with_keypoints)
        
        return keypoints, descriptors

    def extract_features(self, method='orb'):
        descriptors_list = []
        labels = []
        descriptor_lengths = []
        # Extract features for each class in the training set
        for label, images in self.trainingSet.items():
            for img in images:
                # Generate the output path for saving keypoint visualizations
                output_path = os.path.join(self.outputDirectory, f"{label}_{method}.png")
                keypoints, descriptors = self.detect_and_compute(img, method, visualize=True, output_path=output_path)
                if descriptors is not None:
                    descriptors_list.append(descriptors)
                    labels.extend([self.label_encoder.transform([label])[0]] * len(descriptors))
                    descriptor_lengths.append(len(descriptors))
        
        # Display some statistics about the descriptors
        print(f"Method: {method}")
        print(f"Number of images: {len(descriptor_lengths)}")
        print(f"Average number of descriptors per image: {np.mean(descriptor_lengths)}")
        print(f"Descriptor dimensions: {descriptors_list[0].shape[1] if descriptors_list else 'N/A'}")
        
        # Return the combined descriptors and corresponding labels
        if descriptors_list:
            return np.vstack(descriptors_list), np.array(labels)
        else:
            return np.array([]), np.array([])

    def train(self, method='orb', n_clusters=128):
        # Extract features from the training set
        descriptors, labels = self.extract_features(method)
        if descriptors.size == 0:
            return None, None, None
        # Scale the descriptors
        scaler = sklearn.preprocessing.StandardScaler()
        scaled_descriptors = scaler.fit_transform(descriptors)
        # Perform k-means clustering on the scaled descriptors
        kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
        kmeans.fit(scaled_descriptors)
        
        # Map the cluster labels to class labels
        cluster_to_class = self.map_clusters_to_labels(kmeans.labels_, labels)
        return kmeans, scaler, cluster_to_class

    def map_clusters_to_labels(self, cluster_labels, true_labels):
        # Create a mapping from cluster labels to class labels
        cluster_to_class = {}
        for cluster in np.unique(cluster_labels):
            indices = np.where(cluster_labels == cluster)
            most_common_label = np.bincount(true_labels[indices]).argmax()
            cluster_to_class[cluster] = most_common_label
        return cluster_to_class

    def predict(self, kmeans, scaler, cluster_to_class, method='orb'):
        y_true = []
        y_pred = []
        # Predict the class for each image in the test set
        for label, images in self.testSet.items():
            for img in images:
                keypoints, descriptors = self.detect_and_compute(img, method)
                if descriptors is not None:
                    scaled_descriptors = scaler.transform(descriptors)
                    cluster_labels = kmeans.predict(scaled_descriptors)
                    predicted_labels = [cluster_to_class.get(cluster, -1) for cluster in cluster_labels]
                    predicted_label = self.majority_vote(predicted_labels)
                    y_true.append(self.label_encoder.transform([label])[0])
                    y_pred.append(predicted_label)
        return y_true, y_pred

    def majority_vote(self, labels):
        # Return the most common label in the list
        if len(labels) == 0:
            return -1  # Return an invalid class if no descriptors found
        counts = np.bincount(labels)
        most_common_label = np.argmax(counts)
        return most_common_label

    def evaluate(self, method='orb', n_clusters=128):
        # Train the model and evaluate its performance
        kmeans, scaler, cluster_to_class = self.train(method, n_clusters)
        if kmeans is None or scaler is None or cluster_to_class is None:
            print(f"Training failed for method: {method}")
            return 0.0, np.zeros((len(self.nameOfClasses), len(self.nameOfClasses)))

        y_true, y_pred = self.predict(kmeans, scaler, cluster_to_class, method)
        
        accuracy = sklearn.metrics.accuracy_score(y_true, y_pred)
        confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=np.arange(len(self.nameOfClasses)))
        return accuracy, confusion_matrix

if __name__ == '__main__':
    recognizer = Recognizer()  # Initialize the recognizer
    methods = ['orb', 'brisk', 'akaze']  # Define the methods to evaluate
    cluster_sizes = [4096]  # Define the number of clusters for k-means

    results = []

    # Evaluate the model for each method and cluster size
    for method in methods:
        for n_clusters in cluster_sizes:
            accuracy, confusion_matrix = recognizer.evaluate(method, n_clusters)
            results.append((method, n_clusters, accuracy, confusion_matrix))

    # Print the summarized results
    for method, n_clusters, accuracy, confusion_matrix in results:
        print(f"Method: {method}, Clusters: {n_clusters}")
        print(f"Accuracy: {accuracy:.4f}")
        print("Confusion Matrix:")
        if confusion_matrix.shape == (len(recognizer.nameOfClasses), len(recognizer.nameOfClasses)):
            print(pd.DataFrame(confusion_matrix, index=recognizer.nameOfClasses, columns=recognizer.nameOfClasses))
        else:
            print("Confusion matrix size does not match the number of classes.")
        print("\n")



Loaded 583 images for toothbrush_4
Loaded 799 images for shampoo_6
Loaded 772 images for onion_1
Loaded 807 images for marker_6
Loaded 786 images for hand_towel_5
Loaded 789 images for garlic_4
Loaded 782 images for food_box_11
Loaded 542 images for cereal_box_5
Loaded 580 images for calculator_1
Loaded 658 images for bell_pepper_2
Data loading complete.
Balanced training data
Method: orb
Number of images: 2475
Average number of descriptors per image: 128.30989898989898
Descriptor dimensions: 32
Method: brisk
Number of images: 3256
Average number of descriptors per image: 89.79791154791155
Descriptor dimensions: 64
Method: akaze
Number of images: 2326
Average number of descriptors per image: 35.013757523645744
Descriptor dimensions: 61
Method: orb, Clusters: 4096
Accuracy: 0.5045
Confusion Matrix:
               toothbrush_4  shampoo_6  onion_1  marker_6  hand_towel_5  \
toothbrush_4              2          2        4        12             0   
shampoo_6                 0         21   