# 🧠 SIFT + SPM + SVM Classifier for Aerial Scene Classification

This notebook builds upon your original SIFT + SVM pipeline and adds **Spatial Pyramid Matching (SPM)** to improve spatial awareness in feature representation.

In [1]:

import cv2
import numpy as np
import os
from glob import glob
from sklearn.cluster import MiniBatchKMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

N_CLUSTERS = 100
PYRAMID_LEVELS = [1, 2]  # SPM levels: 1x1, 2x2
RANDOM_STATE = 42


In [2]:

def load_dataset(dataset_path):
    images, labels = [], []
    class_names = sorted(os.listdir(dataset_path))
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(dataset_path, class_name)
        for img_path in glob(os.path.join(class_dir, '*.jpg')):
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(label)
    return images, labels, class_names

dataset_path = '../Aerial_Landscapes'  # Update this path if needed
images, labels, class_names = load_dataset(dataset_path)


In [3]:

def extract_dense_sift_spm(image, levels=PYRAMID_LEVELS):
    sift = cv2.SIFT_create()
    h, w = image.shape[:2]
    descriptors = []

    for level in levels:
        step_h = h // level
        step_w = w // level
        for i in range(level):
            for j in range(level):
                x1, y1 = j * step_w, i * step_h
                x2, y2 = (j + 1) * step_w, (i + 1) * step_h
                patch = image[y1:y2, x1:x2]
                gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
                kp, des = sift.detectAndCompute(gray, None)
                if des is not None:
                    descriptors.append(des)
    return np.vstack(descriptors) if descriptors else np.array([])


In [5]:

# Extract descriptors for all training images
all_descriptors = []
image_descriptors = []

for img in tqdm(images):
    des = extract_dense_sift_spm(img)
    image_descriptors.append(des)
    if des is not None:
        all_descriptors.extend(des)

all_descriptors = np.vstack(all_descriptors)
print("Total descriptors shape:", all_descriptors.shape)


100%|██████████| 12000/12000 [31:05<00:00,  6.43it/s]


Total descriptors shape: (13895675, 128)


In [6]:

# Build visual vocabulary using KMeans
kmeans = MiniBatchKMeans(n_clusters=N_CLUSTERS, random_state=RANDOM_STATE, batch_size=N_CLUSTERS * 20)
kmeans.fit(all_descriptors)
print("Visual vocabulary created.")


[WinError 2] 系统找不到指定的文件。
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Visual vocabulary created.


In [7]:

def build_histogram(descriptors, kmeans_model):
    histogram = np.zeros(N_CLUSTERS)
    if descriptors is not None and len(descriptors) > 0:
        words = kmeans_model.predict(descriptors)
        for word in words:
            histogram[word] += 1
    return histogram

# Compute histograms for all training images
histograms = [build_histogram(des, kmeans) for des in image_descriptors]
X = np.array(histograms)
y = np.array(labels)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=RANDOM_STATE)


In [8]:

# Train SVM classifier
svm = SVC(kernel='rbf', C=10, gamma='scale')
svm.fit(X_train, y_train)


In [9]:

# Evaluate the model
y_pred = svm.predict(X_test)

print("——— 评估结果 ———")
print("准确率 (Accuracy):", accuracy_score(y_test, y_pred))
print("精确率 (Precision):", precision_score(y_test, y_pred, average='macro'))
print("召回率 (Recall):", recall_score(y_test, y_pred, average='macro'))
print("F1分数 (F1-Score):", f1_score(y_test, y_pred, average='macro'))
print("\n混淆矩阵:")
print(confusion_matrix(y_test, y_pred))
print("\n分类报告:")
print(classification_report(y_test, y_pred, target_names=class_names, digits=4))


——— 评估结果 ———
准确率 (Accuracy): 0.6904166666666667
精确率 (Precision): 0.6885228189625034
召回率 (Recall): 0.6895414228881784
F1分数 (F1-Score): 0.6878192010015066

混淆矩阵:
[[111   2   3   3  15   1   8   2   5   1   1   3   0   3  10]
 [  8 106   0  11   0   0   0   7   2   1   3  10   5   0   8]
 [  3   1  93   0   6   3  17   3  15   4   0   2   1   2   6]
 [  0   6   1 128   0   0   0   5   3   1   0   9   9   4   4]
 [  1   1   9   0 117   2  24   0   3  11   0   0   0   0   5]
 [  0   0   1   0   4 143  10   1   1   1   0   0   0   1   1]
 [  3   2   3   0  40   6 103   1   4   0   0   0   0   1   2]
 [  3   8   4   2   2   3   3  94   5   1   3   0  12   1   7]
 [ 12   1  15   1   7   2  14  13  55  13   0   4   1   4  15]
 [  2   0   8   0  12   3   0   0   8 116   0   0   1   0   6]
 [  1   0   0   0   0   1   1   1   0   0 138   3   1   3   0]
 [  2   7   6  11   0   0   0   2   5   1   2 115   2   0   2]
 [  2  12   0   6   0   0   0  14   1   0   0   2 110   0   5]
 [  0   1   0   3   2