In [1]:
import cv2
import numpy as np
import os
from glob import glob
from sklearn.cluster import MiniBatchKMeans
from sklearn.svm import SVC
from sklearn.metrics import(
accuracy_score,
precision_score,
recall_score,
f1_score,confusion_matrix,
classification_report)
from sklearn.preprocessing import StandardScaler

In [2]:
# 参数配置
N_CLUSTERS=80 #视觉词袋大小
MAX_SAMPLES=5000 #用于训练词袋的最大样本数
TEST_SIZE =0.2 # 测试集比例
RANDOM_STATE =42 #随机种子

In [3]:
def load_and_split_dataset(dataset_path, test_size=0.2, sample_ratio=1.0):
    """
    按类别划分训练集和测试集，确保每个类别的样本按比例分割
    数据抽样功能：sample_ratio: 每类样本的抽样比例 (0.0-1.0)
    数据集结构应为:
    dataset/
        class1/
            img1.jpg
            img2.jpg
            ...
        class2/
            img1.jpg
            img2.jpg
            ...
    返回:
        (train_images, train_labels), (test_images, test_labels), classes
    """
    train_images = []
    train_labels = []
    test_images = []
    test_labels = []

    classes = sorted(os.listdir(dataset_path))

    for class_id, class_name in enumerate(classes):
        class_dir = os.path.join(dataset_path, class_name)
        img_paths = glob(os.path.join(class_dir, '*.jpg'))

        # 按类别分层划分
        if len(img_paths) == 0:
            continue  # 跳过空目录

        # 随机打乱路径
        np.random.seed(RANDOM_STATE)
        np.random.shuffle(img_paths)

        # 先进行数据抽样
        n_samples = int(len(img_paths) * sample_ratio)
        sampled_paths = img_paths[:max(n_samples, 1)]  # 确保至少取1个样本

        # 再划分训练测试集
        split_idx = int(len(sampled_paths) * (1 - test_size))  # 计算划分点
        train_paths = sampled_paths[:split_idx]
        test_paths = sampled_paths[split_idx:] if split_idx < len(sampled_paths) else []

        # 加载训练集图像
        for path in train_paths:
            img = cv2.imread(path)
            if img is not None:
                train_images.append(img)
                train_labels.append(class_id)
            else:
                print('图片读取失败！')

        # 加载测试集图像
        for path in test_paths:
            img = cv2.imread(path)
            if img is not None:
                test_images.append(img)
                test_labels.append(class_id)
            else:
                print('图片读取失败！')

    return (train_images, train_labels), (test_images, test_labels), classes


In [4]:
def extract_color_sift_features(images):
    """
    提取彩色图像SIFT特征（分别处理RGB三个通道）
    返回每个图像的特征描述符列表
    """
    sift = cv2.SIFT_create()
    all_descriptors = []

    for img in images:
        # 转换为RGB颜色空间
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        channels = cv2.split(rgb)
        img_descriptors = []

        for channel in channels:
            _, des = sift.detectAndCompute(channel, None)
            if des is not None:
                img_descriptors.append(des)

        # 合并三个通道的描述符
        if len(img_descriptors) > 0:
            # vstack 纵向合并 → 拼成 n×128 的向量
            img_descriptors = np.vstack(img_descriptors)
            all_descriptors.append(img_descriptors)
        else:
            all_descriptors.append(np.array([]))

    return all_descriptors


In [5]:
def create_visual_vocabulary(descriptors_list, n_clusters=N_CLUSTERS, max_samples=MAX_SAMPLES):
    """
    创建视觉词袋模型
    """
    # 收集所有的描述符
    all_descriptors = np.vstack([d for d in descriptors_list if len(d) > 0])

    # 随机从样本中抽取 max_samples:
    np.random.seed(RANDOM_STATE)
    all_descriptors = all_descriptors[np.random.choice(len(all_descriptors), max_samples, replace=False)]

    # 使用MiniBatchKMeans加速聚类
    kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=RANDOM_STATE)
    kmeans.fit(all_descriptors)
    return kmeans


In [6]:
def extract_bow_features(descriptors_list, kmeans):
    """
    将描述符转换为词袋特征向量
    """
    n_clusters = kmeans.n_clusters
    features = []

    for descriptors in descriptors_list:
        # 创建归一化直方图
        hist = np.zeros(n_clusters)
        if len(descriptors) > 0:
            # 关键步骤：映射到视觉单词
            labels = kmeans.predict(descriptors)
            # 统计每个视觉单词在当前图像中的出现次数，确保直方图的长度与视觉词典大小一致
            hist = np.bincount(labels, minlength=n_clusters)

        hist = hist.astype(np.float32)
        hist /= hist.sum() + 1e-7  # 归一化

        features.append(hist)

    return np.array(features)


In [7]:
def evaluate_model(y_true, y_pred, classes):
    """
    输出完整评估指标
    """
    # 基础指标计算
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')  # 加权平均
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    # 混淆矩阵
    cm = confusion_matrix(y_true, y_pred)

    # 分类报告（包含每个类别的指标）
    report = classification_report(y_true, y_pred, target_names=classes)

    # 打印结果
    print("\n——— 评估结果 ———")
    print(f"准确率 (Accuracy): {accuracy:.4f}")
    print(f"精确率 (Precision): {precision:.4f}")
    print(f"召回率 (Recall): {recall:.4f}")
    print(f"F1分数 (F1-Score): {f1:.4f}")

    print(f"\n混淆矩阵:\n{cm}")

    print(f"\n分类报告:\n{report}")


In [None]:
# 1. 加载并划分数据集
dataset_path = "../Aerial_Landscapes"
# dataset_path = "/kaggle/input/skyview-an-aerial-landscape-dataset/Aerial_Landscapes"
# 每类只使用50%数据
(train_images, train_labels), (test_images, test_labels), classes = (
    load_and_split_dataset(dataset_path, test_size=TEST_SIZE, sample_ratio=0.5)
)

print(f"训练集: {len(train_images)} 张, 测试集: {len(test_images)} 张")

# 2. 提取训练集特征
train_descriptors = extract_color_sift_features(train_images)
print("训练集特征提取完成")

# 3. 创建视觉词袋（仅用训练集数据！）
kmeans = create_visual_vocabulary(train_descriptors)
print("视觉词袋创建完成")

# 4. 转换训练集和测试集为BOW特征
X_train = extract_bow_features(train_descriptors, kmeans)
X_test = extract_bow_features(extract_color_sift_features(test_images), kmeans)
y_train = np.array(train_labels)
y_test = np.array(test_labels)

# 5. 标准化（仅用训练集统计量）
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. train SVM classifier
clf = SVC(kernel="linear", random_state=RANDOM_STATE)
clf.fit(X_train, y_train)
print("分类器训练完成")

# 7. evaluate model
y_pred = clf.predict(X_test)
evaluate_model(y_test, y_pred, classes)


训练集: 4800 张, 测试集: 1200 张
训练集特征提取完成


[WinError 2] 系统找不到指定的文件。
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\300-Function\320-Studio\326-Anaconda\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


视觉词袋创建完成
分类器训练完成

——— 评估结果 ———
准确率 (Accuracy): 0.5908
精确率 (Precision): 0.5884
召回率 (Recall): 0.5908
F1分数 (F1-Score): 0.5869

混淆矩阵:
[[52  2  2  1  3  1  3  2  4  1  0  0  2  4  3]
 [ 6 47  1  5  1  2  1  3  3  0  1  2  4  3  1]
 [ 3  2 49  3  3  1  3  3  3  1  3  1  0  1  4]
 [ 0 11  2 50  0  0  0  0  1  0  0  3  4  6  3]
 [ 2  0  2  0 49  4  9  0  4  6  0  0  0  2  2]
 [ 0  0  1  0  3 67  5  0  2  0  0  0  0  0  2]
 [ 3  0  0  0 13  8 41  1  5  2  0  1  1  3  2]
 [ 4  7  2  1  1  1  7 33  7  0  0  1  8  1  7]
 [ 6  2 12  0  5  1  7  5 21  4  2  2  1  1 11]
 [ 0  0  3  1  7  0  0  2  7 56  0  0  0  2  2]
 [ 0  2  1  2  0  0  0  1  1  0 71  0  0  2  0]
 [ 1  3  2 14  0  0  0  2  4  1  2 42  3  1  5]
 [ 0 11  0  8  1  0  0 11  1  0  0  4 44  0  0]
 [ 3  4  1  0  2  0  0  2  3  0  1  0  0 62  2]
 [ 9  2 14  2  3  4  3  1  5  7  0  3  1  1 25]]

分类报告:
              precision    recall  f1-score   support

 Agriculture       0.58      0.65      0.62        80
     Airport       0.51      0.59