In [1]:
%pip install --upgrade pip
%pip install opencv-python scikit-learn matplotlib scipy==1.10.1

Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.1
    Uninstalling pip-25.1:
      Successfully uninstalled pip-25.1
Successfully installed pip-25.1.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from scipy import stats
from pathlib import Path, PureWindowsPath
import random


def extract_dataset_info(data_path):
    label_classes = set()
    label_train_list = []
    img_train_list = []
    label_test_list = []
    img_test_list = []
    data_dir = Path(data_path)
    for entry in open(data_dir / "train.txt"):
        label, img_path = entry.strip().split()
        label_classes.add(label)
        img_train_list.append(str(data_dir / img_path))
        label_train_list.append(label)
    for entry in open(data_dir / "test.txt"):
        label, img_path = entry.strip().split()
        label_classes.add(label)
        img_test_list.append(str(data_dir / img_path))
        label_test_list.append(label)
    label_classes = sorted(list(label_classes))
    label_train_list = np.array(
        [label_classes.index(label) for label in label_train_list]
    )
    label_test_list = np.array(
        [label_classes.index(label) for label in label_test_list]
    )
    return label_classes, label_train_list, img_train_list, label_test_list, img_test_list


def compute_dsift(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    size = 16
    step = 8
    sift = cv2.SIFT_create()
    kp = [
        cv2.KeyPoint(x, y, size)
        for y in range(0, img.shape[0], step)
        for x in range(0, img.shape[1], step)
    ]
    kp, dense_feature = sift.compute(img, kp)
    return dense_feature


def predict_knn(feature_train, label_train, feature_test, k):
    model = NearestNeighbors(n_neighbors=k)
    model.fit(feature_train)
    neighbors = model.kneighbors(feature_test, return_distance=False)
    label_test_pred = []
    for idxs in neighbors:
        neighbor_labels = label_train[idxs]
        label = stats.mode(neighbor_labels, keepdims=False)[0]
        label_test_pred.append(label)
    return label_test_pred


def build_visual_dictionary(dense_feature_list, dic_size):
    vocab = KMeans(n_clusters=dic_size, n_init="auto", random_state=11).fit(
        dense_feature_list
    )
    return vocab


def compute_bow(feature, vocab):
    if feature is None or len(feature) == 0:
        return np.zeros((vocab.n_clusters,), dtype=np.float32)

    cluster_indices = vocab.predict(feature)
    bow_feature, _ = np.histogram(cluster_indices, bins=np.arange(vocab.n_clusters + 1))
    norm = np.linalg.norm(bow_feature)
    if norm > 0:
        bow_feature = bow_feature / norm

    return bow_feature


def classify_knn_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list):
    print("Compute SIFT features for training images")
    dense_feature_list_train = [
        compute_dsift(cv2.imread(img)) for img in img_train_list
    ]
    all_train_features = np.vstack(
        [f for f in dense_feature_list_train if f is not None]
    )
    print("Build visual dictionary")
    vocab = build_visual_dictionary(all_train_features, 400)
    print("Compute Bag of Words features for training images")
    bow_train = np.vstack([compute_bow(f, vocab) for f in dense_feature_list_train])
    print("Compute Bag of Words features for test images")
    dense_feature_list_test = [compute_dsift(cv2.imread(img)) for img in img_test_list]
    bow_test = np.vstack([compute_bow(f, vocab) for f in dense_feature_list_test])
    print("Predict with kNN")
    label_test_pred = predict_knn(bow_train, label_train_list, bow_test, 10)
    confusion = np.zeros((len(label_classes), len(label_classes)), dtype=np.int32)
    for i in range(len(label_test_pred)):
        confusion[label_test_list[i], label_test_pred[i]] += 1
    accuracy = np.trace(confusion) / np.sum(confusion)
    visualize_confusion_matrix(confusion, accuracy, label_classes, method_name="bow_knn")
    return confusion, accuracy


def predict_svm(feature_train, label_train, feature_test):
    classes = np.unique(label_train)
    n_classes = len(classes)
    n_test = feature_test.shape[0]
    scores = np.zeros((n_test, n_classes))
    for idx, cls in enumerate(classes):
        # 1 vs all: 1 for current class, 0 for others
        binary_labels = (label_train == cls).astype(int)
        model = SVC(C=2, kernel="linear", probability=False, random_state=0)
        model.fit(feature_train, binary_labels)
        scores[:, idx] = model.decision_function(feature_test)
    label_test_pred = np.argmax(scores, axis=1)
    return label_test_pred


def classify_svm_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list):
    print("Compute SIFT features for training images")
    dense_feature_list_train = [
        compute_dsift(cv2.imread(img)) for img in img_train_list
    ]
    all_train_features = np.vstack(
        [f for f in dense_feature_list_train if f is not None]
    )
    print("Build visual dictionary")
    vocab = build_visual_dictionary(all_train_features, 600)
    print("Compute Bag of Words features for training images")
    bow_train = np.vstack([compute_bow(f, vocab) for f in dense_feature_list_train])
    print("Compute Bag of Words features for test images")
    dense_feature_list_test = [compute_dsift(cv2.imread(img)) for img in img_test_list]
    bow_test = np.vstack([compute_bow(f, vocab) for f in dense_feature_list_test])
    print("Predict with SVM")
    label_test_pred = predict_svm(bow_train, label_train_list, bow_test)
    confusion = np.zeros((len(label_classes), len(label_classes)), dtype=np.int32)
    for i in range(len(label_test_pred)):
        confusion[label_test_list[i], label_test_pred[i]] += 1
    accuracy = np.trace(confusion) / np.sum(confusion)
    visualize_confusion_matrix(confusion, accuracy, label_classes, method_name="bow_svm")
    return confusion, accuracy


def visualize_confusion_matrix(confusion, accuracy, label_classes,  method_name, out_dir="outputs"):
    os.makedirs(out_dir, exist_ok=True)
    plt.title("accuracy = {:.3f}".format(accuracy))
    plt.imshow(confusion)
    ax, fig = plt.gca(), plt.gcf()
    plt.xticks(np.arange(len(label_classes)), label_classes)
    plt.yticks(np.arange(len(label_classes)), label_classes)
    # set horizontal alignment mode (left, right or center) and rotation mode(anchor or default)
    plt.setp(ax.get_xticklabels(), rotation=-30, ha="center", rotation_mode="default")
    # avoid top and bottom part of heatmap been cut
    ax.set_xticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.set_yticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.tick_params(which="minor", bottom=False, left=False)
    fig.tight_layout()
    fname = os.path.join(out_dir, f"{method_name}_confusion_acc.png")
    plt.savefig(fname, bbox_inches='tight')
    plt.close()


if __name__ == '__main__':
    label_classes, label_train_list, img_train_list, label_test_list, img_test_list = extract_dataset_info("./scene_classification_data")

    classify_knn_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list)
    
    classify_svm_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list)


Compute SIFT features for training images
Build visual dictionary
Compute Bag of Words features for training images
Compute Bag of Words features for test images
Predict with kNN
Compute SIFT features for training images
Build visual dictionary
Compute Bag of Words features for training images
Compute Bag of Words features for test images
Predict with SVM
