## The script of split data

In [23]:

import os
import shutil
import random
from glob import glob
from sklearn.model_selection import train_test_split

dataset_path = '../Aerial_Landscapes'
split_root = '../split_data'
os.makedirs(split_root, exist_ok=True)

random.seed(42)

for class_name in sorted(os.listdir(dataset_path)):
    class_dir = os.path.join(dataset_path, class_name)
    if not os.path.isdir(class_dir):
        continue

    images = glob(os.path.join(class_dir, '*.jpg'))
    trainval_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)
    train_imgs, val_imgs = train_test_split(trainval_imgs, test_size=0.125, random_state=42)  # 10% val of full set

    for split, split_imgs in zip(['train', 'val', 'test'], [train_imgs, val_imgs, test_imgs]):
        split_dir = os.path.join(split_root, split, class_name)
        os.makedirs(split_dir, exist_ok=True)
        for img_path in split_imgs:
            shutil.copy(img_path, os.path.join(split_dir, os.path.basename(img_path)))

print("✅ 数据划分完成：Train/Val/Test 分别为 70% / 10% / 20%")


✅ 数据划分完成：Train/Val/Test 分别为 70% / 10% / 20%


# SIFT + SPM + SVM Classifier for Aerial Scene Classification

This notebook builds upon your original SIFT + SVM pipeline and adds **Spatial Pyramid Matching (SPM)** to improve spatial awareness in feature representation.

In [24]:

import cv2
import numpy as np
import os
from glob import glob
from sklearn.cluster import MiniBatchKMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

N_CLUSTERS = 100
PYRAMID_LEVELS = [1, 2]  # SPM levels: 1x1, 2x2
RANDOM_STATE = 42


In [25]:

def load_dataset_split(split_dir):
    images, labels = [], []
    class_names = sorted(os.listdir(split_dir))
    for label, class_name in enumerate(class_names):
        class_path = os.path.join(split_dir, class_name)
        for img_path in glob(os.path.join(class_path, '*.jpg')):
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(label)
    return images, labels, class_names

# Load from pre-split folders
train_images, train_labels, class_names = load_dataset_split('../split_data/train')
val_images, val_labels, _ = load_dataset_split('../split_data/val')
test_images, test_labels, _ = load_dataset_split('../split_data/test')


In [26]:

def extract_dense_sift_spm(image, levels=PYRAMID_LEVELS):
    sift = cv2.SIFT_create()
    h, w = image.shape[:2]
    descriptors = []

    for level in levels:
        step_h = h // level
        step_w = w // level
        for i in range(level):
            for j in range(level):
                x1, y1 = j * step_w, i * step_h
                x2, y2 = (j + 1) * step_w, (i + 1) * step_h
                patch = image[y1:y2, x1:x2]
                gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
                kp, des = sift.detectAndCompute(gray, None)
                if des is not None:
                    descriptors.append(des)
    return np.vstack(descriptors) if descriptors else np.array([])


In [28]:

# Extract descriptors for all training images
all_descriptors = []
image_descriptors = []

for img in tqdm(train_images):  # ✅ 用 train_images 而不是 images
    des = extract_dense_sift_spm(img)
    image_descriptors.append(des)
    if des is not None:
        all_descriptors.extend(des)

all_descriptors = np.vstack(all_descriptors)
print("Total descriptors shape:", all_descriptors.shape)



100%|██████████| 8400/8400 [29:46<00:00,  4.70it/s]     


Total descriptors shape: (9710966, 128)


In [29]:

# Build visual vocabulary using KMeans
kmeans = MiniBatchKMeans(n_clusters=N_CLUSTERS, random_state=RANDOM_STATE, batch_size=N_CLUSTERS * 20)
kmeans.fit(all_descriptors)
print("Visual vocabulary created.")


[WinError 2] 系统找不到指定的文件。
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Visual vocabulary created.


In [33]:

def build_histogram(descriptors, kmeans_model):
    histogram = np.zeros(N_CLUSTERS)
    if descriptors is not None and len(descriptors) > 0:
        words = kmeans_model.predict(descriptors)
        for word in words:
            histogram[word] += 1
    return histogram

# Compute histograms for all training images
histograms = [build_histogram(des, kmeans) for des in image_descriptors]
X = np.array(histograms)
y = np.array(train_labels)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=RANDOM_STATE)


In [34]:

# Train SVM classifier
svm = SVC(kernel='rbf', C=10, gamma='scale')
svm.fit(X_train, y_train)


In [35]:

# Evaluate the model
y_pred = svm.predict(X_test)

print("——— 评估结果 ———")
print("准确率 (Accuracy):", accuracy_score(y_test, y_pred))
print("精确率 (Precision):", precision_score(y_test, y_pred, average='macro'))
print("召回率 (Recall):", recall_score(y_test, y_pred, average='macro'))
print("F1分数 (F1-Score):", f1_score(y_test, y_pred, average='macro'))
print("\n混淆矩阵:")
print(confusion_matrix(y_test, y_pred))
print("\n分类报告:")
print(classification_report(y_test, y_pred, target_names=class_names, digits=4))


——— 评估结果 ———
准确率 (Accuracy): 0.6553571428571429
精确率 (Precision): 0.6572348146707985
召回率 (Recall): 0.6552787236279554
F1分数 (F1-Score): 0.6532053069085267

混淆矩阵:
[[ 77   2   2   1  10   1   7   5   8   0   1   0   2   0  10]
 [  6  68   2   5   0   0   0   3   1   0   1   5  12   1   5]
 [  3   1  92   0   6   0   9   3   3   2   0   1   0   0   4]
 [  0   3   2  80   0   0   0   0   0   0   0   6   5   2   1]
 [  4   2  12   0  70   2  10   0   5   3   0   0   0   0   0]
 [  0   0   2   0   5  89  12   0   1   2   0   0   0   1   1]
 [  4   1   1   0  21   6  68   5   1   0   0   0   0   0   1]
 [  1  10   2   2   1   4   1  62   4   1   2   1  12   1   7]
 [  9   5  11   1   5   1   7  10  40   8   0   4   1   4  13]
 [  0   0   2   0  10   2   2   2   7  69   0   0   0   2  10]
 [  0   4   0   0   0   0   0   2   1   0 104   2   3   2   0]
 [  0  11   3   7   1   1   0   0   0   0   0  88   2   0   5]
 [  3  11   0  12   0   0   0  10   0   0   0   0  57   0   1]
 [  0   6   1   5   1

In [37]:

# 📦 提取验证集和测试集的 SPM 特征 + 标准化

# 验证集
val_descriptors = [extract_dense_sift_spm(img) for img in tqdm(val_images)]
val_histograms = [build_histogram(des, kmeans) for des in val_descriptors]
X_val = np.array(val_histograms)
y_val = np.array(val_labels)
X_val_scaled = scaler.transform(X_val)

# 测试集
test_descriptors = [extract_dense_sift_spm(img) for img in tqdm(test_images)]
test_histograms = [build_histogram(des, kmeans) for des in test_descriptors]
X_test = np.array(test_histograms)
y_test = np.array(test_labels)
X_test_scaled = scaler.transform(X_test)

print("✅ 验证集和测试集特征提取 + 标准化完成")


100%|██████████| 1200/1200 [02:31<00:00,  7.92it/s]
100%|██████████| 2400/2400 [05:56<00:00,  6.74it/s]


✅ 验证集和测试集特征提取 + 标准化完成


In [15]:

# 🔍 GridSearch on Validation Set & Evaluate on Test Set
from sklearn.svm import SVC

best_score = 0
best_params = {}

print("正在验证集上搜索最优超参数...")

# 尝试不同的 SVM 超参数组合
for C in [0.1, 1, 10, 100]:
    for gamma in ['scale', 0.01, 0.001]:
        model = SVC(kernel='rbf', C=C, gamma=gamma)
        model.fit(X_train, y_train)
        val_pred = model.predict(X_val_scaled)
        acc = accuracy_score(y_val, val_pred)
        print(f"验证集准确率: C={C}, gamma={gamma} => Accuracy={acc:.4f}")
        if acc > best_score:
            best_score = acc
            best_params = {'C': C, 'gamma': gamma}

# 输出最优超参数
print("\n✅ 最优参数:", best_params)
print(f"验证集最高准确率: {best_score:.4f}")


正在验证集上搜索最优超参数...
验证集准确率: C=0.1, gamma=scale => Accuracy=0.5350
验证集准确率: C=0.1, gamma=0.01 => Accuracy=0.5358
验证集准确率: C=0.1, gamma=0.001 => Accuracy=0.4417
验证集准确率: C=1, gamma=scale => Accuracy=0.6458
验证集准确率: C=1, gamma=0.01 => Accuracy=0.6458
验证集准确率: C=1, gamma=0.001 => Accuracy=0.5658
验证集准确率: C=10, gamma=scale => Accuracy=0.6508
验证集准确率: C=10, gamma=0.01 => Accuracy=0.6508
验证集准确率: C=10, gamma=0.001 => Accuracy=0.6350
验证集准确率: C=100, gamma=scale => Accuracy=0.6392
验证集准确率: C=100, gamma=0.01 => Accuracy=0.6400
验证集准确率: C=100, gamma=0.001 => Accuracy=0.6500

✅ 最优参数: {'C': 10, 'gamma': 'scale'}
验证集最高准确率: 0.6508


In [17]:
# 使用最优参数重新训练模型，并在测试集上评估
final_model = SVC(kernel='rbf', **best_params)
final_model.fit(X_train, y_train)
test_pred = final_model.predict(X_test_scaled)

print("\n📊 最终测试集评估结果：")
print("Test Accuracy:", accuracy_score(y_test, test_pred))
print("Test Precision:", precision_score(y_test, test_pred, average='macro'))
print("Test Recall:", recall_score(y_test, test_pred, average='macro'))
print("Test F1 Score:", f1_score(y_test, test_pred, average='macro'))
print("\n分类报告:")
print(classification_report(y_test, test_pred, target_names=class_names, digits=4))


📊 最终测试集评估结果：
Test Accuracy: 0.6854166666666667
Test Precision: 0.6855400488525137
Test Recall: 0.6854166666666667
Test F1 Score: 0.6831028285800875

分类报告:
              precision    recall  f1-score   support

 Agriculture     0.7517    0.7000    0.7249       160
     Airport     0.5682    0.6250    0.5952       160
       Beach     0.6188    0.6188    0.6188       160
        City     0.6702    0.7875    0.7241       160
      Desert     0.5845    0.7562    0.6594       160
      Forest     0.8402    0.8875    0.8632       160
   Grassland     0.5808    0.6062    0.5933       160
     Highway     0.6163    0.6625    0.6386       160
        Lake     0.4320    0.3375    0.3789       160
    Mountain     0.7857    0.7562    0.7707       160
     Parking     0.9655    0.8750    0.9180       160
        Port     0.8095    0.7438    0.7752       160
     Railway     0.7170    0.7125    0.7147       160
 Residential     0.8387    0.8125    0.8254       160
       River     0.5039    0.4000