### 采用SIFT特征实现图像检索功能，即输入一张图片，在数据集中检索出相似的图片，数据集自选。

In [17]:
import cv2
import numpy as np
import os
from tqdm import tqdm

In [18]:
# 定义数据集路径
dataset_path = './test_data'

# 初始化SIFT特征提取器
sift = cv2.SIFT_create()

# 定义存储特征的列表
features = []

In [19]:
# 读取数据集中的所有图片，并提取SIFT特征
for file_name in tqdm(os.listdir(dataset_path), desc='Extracting features'):
    # 读取图像
    img = cv2.imread(os.path.join(dataset_path, file_name))
    # 转换成灰度图像
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    # 提取SIFT特征，返回关键点和描述符
    keypoints, descriptor = sift.detectAndCompute(gray, None)
    # 将特征添加到列表中
    features.append(descriptor)

Extracting features: 100%|███████████████████████████████████████████████████████████| 114/114 [00:37<00:00,  3.01it/s]


In [20]:
len(features)

114

In [21]:
features[108]

array([[ 8., 18., 92., ..., 17., 13., 15.],
       [ 0.,  0.,  0., ...,  5.,  4., 95.],
       [26., 18.,  3., ..., 11., 19., 60.],
       ...,
       [ 6., 86., 43., ...,  3.,  8., 12.],
       [ 1.,  0.,  9., ...,  0.,  0., 90.],
       [15., 19.,  5., ...,  0.,  0., 12.]], dtype=float32)

In [22]:
# 读取查询图像，并提取SIFT特征
query_img = cv2.imread('./ashmolean_000000.jpg')
query_gray = cv2.cvtColor(query_img, cv2.COLOR_BGR2GRAY)
query_keypoints, query_descriptor = sift.detectAndCompute(query_gray, None)

In [23]:
len(query_keypoints)

3469

In [24]:
# 创建FLANN匹配器
flann = cv2.FlannBasedMatcher({'algorithm': 0, 'trees': 5}, {'checks': 50})

In [25]:
# 计算查询图像与数据集中所有图像的SIFT特征距离
distances = []
for feature in tqdm(features, desc='Calculating distances'):
    matches = flann.knnMatch(query_descriptor, feature, k=2)
    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)
    # 如果没有匹配的特征，则距离为1
    distance = 1
    if len(good_matches) != 0:
        distance = 1 - len(good_matches) / len(matches)
    distances.append(distance)

Calculating distances: 100%|█████████████████████████████████████████████████████████| 114/114 [04:29<00:00,  2.37s/it]


In [26]:
distances

[0.4531565292591525,
 0.9645430959930816,
 0.9933698472182185,
 0.996829057365235,
 0.9870279619486884,
 0.9965407898529836,
 0.9910637071202075,
 0.9939463822427212,
 0.9166906889593542,
 0.9899106370712021,
 0.9971173248774863,
 0.9766503315076391,
 0.996829057365235,
 0.9930815797059671,
 0.9873162294609398,
 0.9556068031132892,
 0.9798212741424042,
 0.9962525223407322,
 0.9887575670221966,
 0.9982703949264918,
 0.9971173248774863,
 0.9890458345344479,
 0.9939463822427212,
 0.9965407898529836,
 0.9916402421447102,
 0.9956759873162294,
 0.997693859901989,
 0.9965407898529836,
 0.986739694436437,
 0.994522917267224,
 0.9850100893629288,
 0.9971173248774863,
 0.9919285096569617,
 0.9953877198039781,
 0.9850100893629288,
 0.9982703949264918,
 0.9956759873162294,
 0.9974055923897377,
 0.9927933121937158,
 0.9861631594119342,
 0.9979821274142404,
 0.994522917267224,
 0.9711732487748631,
 0.996829057365235,
 0.9922167771692131,
 0.9925050446814644,
 0.9962525223407322,
 0.9985586624387431,

In [27]:
np.argsort(distances)

array([  0,   8,  15,   1,  42,  11,  69,  16,  96,  30,  34,  39,  28,
         4,  14,  18, 103,  21,  94,   9,   6,  24,  59,  32, 101,  44,
        99,  45,  88, 109,  38,  97,  13,   2, 100,  51,   7,  76,  22,
        50, 111,  56,  29,  41,  70,  73,  95,  89, 107,  93,  75,  58,
        48, 106,  33,  84,  63,  25,  36,  54,  80,  82,  86,  49,  74,
        91,  92,  79,  17,  46,  53,  81,  23,   5,  52,  27,  77,  98,
       104,  57,  43,   3, 110,  87,  72, 102,  83,  12, 108,  10,  65,
       112,  20,  31,  85, 113,  37,  55,  60,  61,  26,  66,  40,  68,
        78,  90, 105,  19,  35,  67,  47,  64,  62,  71], dtype=int64)

In [28]:
indices = np.argsort(distances)[:5]

In [29]:
# 显示结果，将5张图显示在不同的窗口
for index in indices:
    img = cv2.imread(os.path.join(dataset_path, os.listdir(dataset_path)[index]))
    cv2.imshow('result', img)
    cv2.waitKey(0)

cv2.destroyAllWindows()

In [19]:
# 目标图像的大小
target_size = (300, 300)

# 创建一个大画布，用于显示多张图像
canvas = np.zeros((target_size[1], target_size[0] * len(indices), 3), dtype=np.uint8)

# 缩放并拼接图像到大画布上
for i, index in enumerate(indices):
    img = cv2.imread(os.path.join(dataset_path, os.listdir(dataset_path)[index]))
    img = cv2.resize(img, target_size)  # 缩放图像到目标大小
    canvas[:, i * target_size[0] : (i + 1) * target_size[0], :] = img

# 显示拼接后的图像
cv2.imshow('Results', canvas)
cv2.waitKey(0)
cv2.destroyAllWindows()


### 基于词袋模型

In [1]:
import os

import cv2
import numpy as np
from sklearn.cluster import KMeans
from tqdm import tqdm

In [2]:
# 定义数据集路径
dataset_path = './test_data'

# 初始化SIFT特征提取器
sift = cv2.SIFT_create()

# 定义存储特征的列表
features = []

In [3]:
# 读取数据集中的所有图片，并提取SIFT特征
for file_name in tqdm(os.listdir(dataset_path), desc='Extracting features'):
    # 读取图像
    img = cv2.imread(os.path.join(dataset_path, file_name))
    # 转换成灰度图像
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 直方图均衡化
    gray = cv2.equalizeHist(gray)
    # 提取SIFT特征，返回关键点和描述符
    keypoints, descriptor = sift.detectAndCompute(gray, None)
    # 将特征添加到列表中
    features.append(descriptor)

Extracting features: 100%|███████████████████████████████████████████████████████████| 114/114 [00:39<00:00,  2.91it/s]


In [4]:
# 将特征列表转换为numpy数组
features = np.concatenate(features)

In [5]:
# 使用K均值聚类构建词袋模型
kmeans = KMeans(n_clusters=100, n_init=10)  # 设置聚类中心数量，n_init表示聚类的初始尝试次数
kmeans.fit(features)

In [6]:
# 定义存储词袋特征的列表
bag_of_features = []

In [7]:
# 计算每张图像的词袋特征
for file_name in tqdm(os.listdir(dataset_path), desc='Calculating bag-of-features'):
    # 读取图像
    img = cv2.imread(os.path.join(dataset_path, file_name))
    # 转换成灰度图像
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 直方图均衡化
    gray = cv2.equalizeHist(gray)
    # 提取SIFT特征，返回关键点和描述符
    keypoints, descriptor = sift.detectAndCompute(gray, None)
    # 使用K均值聚类将描述符映射到词袋中心
    labels = kmeans.predict(descriptor)
    # 统计每个词袋中心的频次
    hist, _ = np.histogram(labels, bins=np.arange(101))
    # 归一化特征向量
    hist = hist.astype(float)
    hist /= np.sum(hist)
    # 将特征向量添加到列表中
    bag_of_features.append(hist)

Calculating bag-of-features: 100%|███████████████████████████████████████████████████| 114/114 [00:40<00:00,  2.85it/s]


In [9]:
# 将词袋特征列表转换为numpy数组
bag_of_features = np.array(bag_of_features)

In [10]:
bag_of_features.shape

(114, 100)

In [11]:
# 读取查询图像，并提取SIFT特征
query_img = cv2.imread('./ashmolean_000000.jpg')
query_gray = cv2.cvtColor(query_img, cv2.COLOR_BGR2GRAY)
query_keypoints, query_descriptor = sift.detectAndCompute(query_gray, None)
# 使用K均值聚类将查询图像的描述符映射到词袋中心
query_labels = kmeans.predict(query_descriptor)
# 统计每个词袋中心的频次
query_hist, _ = np.histogram(query_labels, bins=np.arange(101))
# 归一化特征向量
query_hist = query_hist.astype(float)
query_hist /= np.sum(query_hist)

In [12]:
# 计算查询图像与数据集中每张图像的相似度
scores = np.sum(np.minimum(bag_of_features, query_hist), axis=1)

In [13]:
# 获取相似度最高的前5张图像的索引
top5 = np.argsort(scores)[::-1][:5]

In [14]:
# 显示查询图像
cv2.imshow('Query image', query_img)
cv2.waitKey(0)

-1

In [15]:
# 显示相似度最高的前5张图像
for i in range(5):
    img = cv2.imread(os.path.join(dataset_path, os.listdir(dataset_path)[top5[i]]))
    cv2.imshow('Top {}'.format(i + 1), img)
    cv2.waitKey(0)

In [16]:
cv2.destroyAllWindows()