In [207]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans

# Path to the dataset
data_path = './assets/train/'

# Feature extractor (SIFT)
sift = cv2.SIFT_create(contrastThreshold=0.01)

## Create Descriptors

In [274]:
def extract_descriptors_from_image(image_path):
    """
    Extract exactly a specified number of descriptors from an image using SIFT.
    """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image not found or invalid: {image_path}")
    
    keypoints, descriptors = sift.detectAndCompute(image, None)
    
    if descriptors is None or len(descriptors) == 0:
        raise ValueError(f"No descriptors found in image: {image_path}")

    return descriptors


In [275]:
def collect_descriptors_all(data_path):
    """
    Collect descriptors and their corresponding labels.
    """
    all_descriptors = []
    all_labels = []
    
    classes = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    
    num_des_tot = 0
    num_des_class_dist = []
    for i, class_name in enumerate(classes):
        class_folder = os.path.join(data_path, class_name)
        print(f"Processing class: {class_name}")
        
        num_des_class = 0
        for image_name in os.listdir(class_folder):
            if image_name.lower().endswith(('png', 'jpg', 'jpeg')):
                image_path = os.path.join(class_folder, image_name)
                try:
                    # Extract descriptors for this image
                    descriptors = extract_descriptors_from_image(image_path)
                    
                    # Append descriptors and corresponding labels
                    all_descriptors += descriptors.tolist()
                    all_labels += [i]*len(descriptors)
                    num_des_class += len(descriptors)
                except ValueError as e:
                    print(f"Error processing {image_name}: {e}")
        num_des_tot += num_des_class
        num_des_class_dist.append(num_des_class)
    print(f'total number of descriptors: {num_des_tot}')
    print(f'number of descriptors per class: {num_des_class_dist}')
    
    # Convert to NumPy arrays
    all_descriptors = np.array(all_descriptors)
    all_labels = np.array(all_labels)
    
    return all_descriptors, all_labels

In [289]:
# Extract descriptors and labels
descriptors_pool, labels_pool = collect_descriptors_all(data_path)
print("Descriptor extraction completed.")

# Save the descriptors and labels
np.save('descriptors.npy', descriptors_pool)
np.save('labels.npy', labels_pool)


Processing class: tick
Processing class: trilobite
Processing class: umbrella
Processing class: watch
Processing class: water_lilly
Processing class: wheelchair
Processing class: wild_cat
Processing class: windsor_chair
Processing class: wrench
Processing class: yin_yang
total number of descriptors: 100076
number of descriptors per class: [13040, 13831, 5370, 9933, 13181, 14386, 13445, 7841, 3860, 5189]
Descriptor extraction completed.


## K-means codebook

In [277]:
# Number of clusters (k)
k = 50  # Adjust based on your application

# Initialize and fit KMeans
kmeans = KMeans(n_clusters=k, random_state=42, verbose=1, n_init=10)
kmeans.fit(descriptors_pool)

# Access cluster centers and labels
k_means_codebook = kmeans.cluster_centers_  # Shape: (k, 128)
labels = kmeans.labels_  # Shape: (104008,)


Initialization complete
Iteration 0, inertia 13918302770.0.
Iteration 1, inertia 9473660338.157099.
Iteration 2, inertia 9256490834.782393.
Iteration 3, inertia 9166340549.62132.
Iteration 4, inertia 9116701757.458235.
Iteration 5, inertia 9081762703.546354.
Iteration 6, inertia 9055276841.253765.
Iteration 7, inertia 9035533508.830261.
Iteration 8, inertia 9021246309.880865.
Iteration 9, inertia 9011299035.230618.
Iteration 10, inertia 9003966084.124756.
Iteration 11, inertia 8998336285.687014.
Iteration 12, inertia 8993783126.308779.
Iteration 13, inertia 8989868952.294231.
Iteration 14, inertia 8986488505.460495.
Iteration 15, inertia 8983664629.140762.
Iteration 16, inertia 8981451982.470354.
Iteration 17, inertia 8979794933.314522.
Iteration 18, inertia 8978468835.653322.
Iteration 19, inertia 8977318532.40188.
Iteration 20, inertia 8976301538.80373.
Iteration 21, inertia 8975419340.91672.
Iteration 22, inertia 8974664096.008266.
Iteration 23, inertia 8974033801.511042.
Iteration 

## Train with K-means codebook

In [278]:
def create_bow_histogram(img_descriptors, kmeans):
    """
    Create a Bag of Words histogram for an image.
    descriptors: numpy array of descriptors (num_descriptors, 128)
    kmeans: trained k-means model
    """
    # Predict the closest cluster for each descriptor
    labels = kmeans.predict(img_descriptors)  # This gives the cluster index for each descriptor
    
    # Create a histogram of cluster assignments (BoW)
    hist, _ = np.histogram(labels, bins=np.arange(k+1), range=(0, k))
    
    # Normalize the histogram to get a normalized BoW representation
    hist = hist / np.sum(hist)  # Normalize to make it a probability distribution
    return hist

In [279]:
def collect_descriptors_with_labels(data_path):
    """
    Collect descriptors and their corresponding labels.
    """
    all_descriptors = []
    all_labels = []
    
    classes = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]

    num_des_tot = 0
    num_des_class_dist = []
    for i, class_name in enumerate(classes):
        class_folder = os.path.join(data_path, class_name)
        print(f"Processing class: {class_name}")
        
        for image_name in os.listdir(class_folder):
            if image_name.lower().endswith(('png', 'jpg', 'jpeg')):
                image_path = os.path.join(class_folder, image_name)
                try:
                    # Extract descriptors for this image
                    descriptors = extract_descriptors_from_image(image_path)
                    all_descriptors.append(descriptors.tolist())
                    all_labels.append(i)
                except ValueError as e:
                    print(f"Error processing {image_name}: {e}")


    print(f'total number of descriptors: {num_des_tot}')
    print(f'number of descriptors per class: {num_des_class_dist}')
    
    # Convert to NumPy arrays
    # all_descriptors = np.array(all_descriptors)
    all_labels = np.array(all_labels)
    
    return all_descriptors, all_labels

In [280]:
# Extract descriptors and labels
data_path = './assets/train/'

train_descriptors, train_labels = collect_descriptors_with_labels(data_path)
print("Descriptor extraction completed.")

Processing class: tick
Processing class: trilobite
Processing class: umbrella
Processing class: watch
Processing class: water_lilly
Processing class: wheelchair
Processing class: wild_cat
Processing class: windsor_chair
Processing class: wrench
Processing class: yin_yang
total number of descriptors: 0
number of descriptors per class: []
Descriptor extraction completed.


In [281]:
train_bow = []
for descriptors in train_descriptors:
    image_bow = create_bow_histogram(descriptors, kmeans)
    train_bow.append(image_bow)

train_bow = np.array(train_bow)

In [282]:
train_bow.shape

(150, 50)

## Test - bag of words histogram

In [283]:
# Extract descriptors and labels
data_path = './assets/test/'

test_descriptors, test_labels = collect_descriptors_with_labels(data_path)
print("Descriptor extraction completed.")

Processing class: tick
Processing class: trilobite
Processing class: umbrella
Processing class: watch
Processing class: water_lilly
Processing class: wheelchair
Processing class: wild_cat
Processing class: windsor_chair
Processing class: wrench
Processing class: yin_yang
total number of descriptors: 0
number of descriptors per class: []
Descriptor extraction completed.


In [284]:
test_bow = []
for descriptors in test_descriptors:
    image_bow = create_bow_histogram(descriptors, kmeans)
    test_bow.append(image_bow)

test_bow = np.array(test_bow)

# Q2. RF classifier

In [None]:
import matplotlib.pyplot as plt
import time
import tracemalloc
import random
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score , recall_score

In [271]:
x_train = train_bow.T
y_train = train_labels
x_test = test_bow.T
y_test = test_labels

In [239]:
def RF_classification(x_train, y_train, x_test, y_test, n_estimators=30, max_depth=10, bootstrap=True, random_state=None, max_samples=0.7, max_features="sqrt", criterion='entropy'):
    rf_clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, bootstrap=bootstrap, random_state=random_state, max_samples=max_samples, max_features=max_features, criterion=criterion)

    # Train ----------------------------------------------------------
    tracemalloc.start() 
    start_time = time.perf_counter()
    
    rf_clf.fit(x_train.T, y_train.ravel())
    
    train_time = time.perf_counter() - start_time
    current, train_peak_memory = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    # Test ----------------------------------------------------------
    tracemalloc.start() 
    start_time = time.perf_counter()

    y_pred = rf_clf.predict(x_test.T)

    test_time = time.perf_counter()- start_time
    current, test_peak_memory = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    accuracy = accuracy_score(y_test.T, y_pred)

    # Full classification report
    # print(classification_report(y_test.T, y_pred, zero_division=0))

    # Retrieve the maximum depth of each tree in the forest
    tree_depths = [estimator.tree_.max_depth for estimator in rf_clf.estimators_]
    max_tree_depth = max(tree_depths)

    return accuracy, y_pred, train_time, test_time, train_peak_memory, test_peak_memory, max_tree_depth

In [240]:
def create_two_pixel_features(x_train, x_test, n_pairs=None, random_seed=None):
    x_combined = np.concatenate([x_train, x_test], axis=1)

    n_features, n_samples = x_combined.shape

    # Set random seed if provided
    if random_seed is not None:
        random.seed(random_seed)

    # Generate all unique pairs of features where i != j
    feature_pairs = [(i, j) for i in range(n_features) for j in range(i + 1, n_features)]

    # If n_pairs is specified, randomly select a subset of feature pairs
    if n_pairs is not None and n_pairs < len(feature_pairs):
        feature_pairs = random.sample(feature_pairs, n_pairs)
    
    # Initialize a new features matrix for pairwise differences
    new_features = np.zeros((len(feature_pairs), n_samples))

    # Fill in the new features with pairwise differences
    for idx, (i, j) in enumerate(feature_pairs):
        new_features[idx, :] = x_combined[i, :] - x_combined[j, :]

    x_train_2pix = new_features[:, :x_train.shape[1]]
    x_test_2pix = new_features[:, x_train.shape[1]:]

    return x_train_2pix, x_test_2pix

### Weak learner: Axis-aligned test

In [272]:
accuracy, y_pred, train_time, test_time, train_peak_memory, test_peak_memory, max_tree_depth = RF_classification(x_train, y_train, x_test, y_test, n_estimators=30, max_depth=10, bootstrap=True, random_state=None, max_samples=0.7, max_features="sqrt", criterion='entropy')

accuracy = accuracy_score(y_test.T, y_pred)
precision = precision_score(y_test.T, y_pred, average= "macro", zero_division=0)
recall = recall_score(y_test.T, y_pred, average= "macro", zero_division=0)

print(f'train time: {train_time}')
print(f'test time: {test_time}')
print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')

train time: 0.17263449996244162
test time: 0.001920499955303967
accuracy: 0.44666666666666666
precision: 0.46541056166056166
recall: 0.44666666666666666


### Weak learner: Two-pixel test

In [273]:
x_train_2pix, x_test_2pix = create_two_pixel_features(x_train, x_test, n_pairs=60, random_seed=0)
accuracy, y_pred, train_time, test_time, train_peak_memory, test_peak_memory, max_tree_depth = RF_classification(x_train_2pix, y_train, x_test_2pix, y_test, n_estimators=30, max_depth=10, bootstrap=True, random_state=None, max_samples=0.7, max_features="sqrt", criterion='entropy')

accuracy = accuracy_score(y_test.T, y_pred)
precision = precision_score(y_test.T, y_pred, average= "macro", zero_division=0)
recall = recall_score(y_test.T, y_pred, average= "macro", zero_division=0)

print(f'train time: {train_time}')
print(f'test time: {test_time}')
print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')

train time: 0.17412590002641082
test time: 0.001936199958436191
accuracy: 0.43333333333333335
precision: 0.44629120879120887
recall: 0.4333333333333334


# Q3. RF codebook

In [363]:
rf_code = RandomForestClassifier(n_estimators=1, max_depth=5, bootstrap=True, random_state=None, max_samples=0.9, max_features="sqrt", criterion='entropy')
rf_code.fit(descriptors_pool, labels_pool.ravel())

In [373]:
def create_rf_histogram(descriptors, rf_code):
    """
    Create a histogram of leaf node visits for an image's descriptors.
    
    Args:
        descriptors (np.ndarray): Array of descriptor vectors for a single image, shape (n_descriptors, descriptor_dim).
        rf_code (RandomForestClassifier): Trained Random Forest model.
        
    Returns:
        np.ndarray: A 1D histogram of leaf node visits, shape (total_leaf_nodes,).
    """
    # Step 1: Apply descriptors to the forest to get leaf indices
    leaf_indices = rf_code.apply(descriptors)  # Shape: (n_descriptors, n_trees)

    # Step 2: Flatten leaf indices into a single array
    flat_leaf_indices = leaf_indices.flatten()

    # Step 3: Compute the histogram of leaf indices
    # Determine the total number of leaf nodes across all trees
    n_trees = rf_code.n_estimators
    total_leaf_nodes = sum([estimator.tree_.node_count for estimator in rf_code.estimators_])
    
    # Create histogram with bins corresponding to leaf node indices
    histogram, _ = np.histogram(flat_leaf_indices, bins=np.arange(total_leaf_nodes + 1), density=False)

    # Step 4: Normalize the histogram (optional)
    histogram = histogram / np.sum(histogram)  # Normalize to sum to 1

    return histogram


### create train and test dataset

In [374]:
# Extract descriptors and labels
data_path = './assets/train/'

train_descriptors, train_labels = collect_descriptors_with_labels(data_path)
print("Descriptor extraction completed.")

train_bow_rf = []
for descriptors in train_descriptors:
    image_bow = create_rf_histogram(descriptors, rf_code)
    train_bow_rf.append(image_bow)

train_bow_rf = np.array(train_bow_rf)

Processing class: tick
Processing class: trilobite
Processing class: umbrella
Processing class: watch
Processing class: water_lilly
Processing class: wheelchair
Processing class: wild_cat
Processing class: windsor_chair
Processing class: wrench
Processing class: yin_yang
total number of descriptors: 0
number of descriptors per class: []
Descriptor extraction completed.


In [375]:
# Extract descriptors and labels
data_path = './assets/test/'

test_descriptors, test_labels = collect_descriptors_with_labels(data_path)
print("Descriptor extraction completed.")

test_bow_rf = []
for descriptors in train_descriptors:
    image_bow = create_rf_histogram(descriptors, rf_code)
    test_bow_rf.append(image_bow)

test_bow_rf = np.array(test_bow_rf)

Processing class: tick
Processing class: trilobite
Processing class: umbrella
Processing class: watch
Processing class: water_lilly
Processing class: wheelchair
Processing class: wild_cat
Processing class: windsor_chair
Processing class: wrench
Processing class: yin_yang
total number of descriptors: 0
number of descriptors per class: []
Descriptor extraction completed.


In [336]:
x_train = train_bow_rf.T
y_train = train_labels
x_test = test_bow_rf.T
y_test = test_labels

### Weak learner: Axis-aligned test

In [377]:
accuracy, y_pred, train_time, test_time, train_peak_memory, test_peak_memory, max_tree_depth = RF_classification(x_train, y_train, x_test, y_test, n_estimators=10, max_depth=10, bootstrap=True, random_state=None, max_samples=0.7, max_features="sqrt", criterion='entropy')

accuracy = accuracy_score(y_test.T, y_pred)
precision = precision_score(y_test.T, y_pred, average= "macro", zero_division=0)
recall = recall_score(y_test.T, y_pred, average= "macro", zero_division=0)

print(f'train time: {train_time}')
print(f'test time: {test_time}')
print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')

train time: 0.05614300002343953
test time: 0.0009127999655902386
accuracy: 0.9266666666666666
precision: 0.9292682072829133
recall: 0.9266666666666667


### Weak learner: Two-pixel test

In [378]:
x_train_2pix, x_test_2pix = create_two_pixel_features(x_train, x_test, n_pairs=60, random_seed=0)
accuracy, y_pred, train_time, test_time, train_peak_memory, test_peak_memory, max_tree_depth = RF_classification(x_train_2pix, y_train, x_test_2pix, y_test, n_estimators=10, max_depth=10, bootstrap=True, random_state=None, max_samples=0.7, max_features="sqrt", criterion='entropy')

accuracy = accuracy_score(y_test.T, y_pred)
precision = precision_score(y_test.T, y_pred, average= "macro", zero_division=0)
recall = recall_score(y_test.T, y_pred, average= "macro", zero_division=0)

print(f'train time: {train_time}')
print(f'test time: {test_time}')
print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')

train time: 0.056869299965910614
test time: 0.0009121999610215425
accuracy: 0.96
precision: 0.9616666666666667
recall: 0.96
