In [1]:
import faiss
import numpy as np
import torch
from transformers import AutoImageProcessor, AutoModel, AutoProcessor, CLIPModel
from PIL import Image
import os
from tqdm import tqdm
import shutil
from sklearn.cluster import KMeans

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

processor_dino = AutoImageProcessor.from_pretrained('facebook/dinov2-base',cache_dir=r'.cache\huggingface\hub')
model_dino = AutoModel.from_pretrained('facebook/dinov2-base',cache_dir=r'.cache\huggingface\hub').to(device)

In [4]:
def add_vector_to_index(embedding, index):
    vector = embedding.detach().cpu().numpy()
    vector = np.float32(vector)
    faiss.normalize_L2(vector)
    index.add(vector)

def extract_features_dino(image):
    with torch.no_grad():
        inputs = processor_dino(images=image, return_tensors="pt").to(device)
        outputs = model_dino(**inputs)
        image_features = outputs.last_hidden_state
        return image_features.mean(dim=1)

In [5]:
folder_patch = r'E:\work\spatio_evo_urbanvisenv_svi\sv\degree_retain'
# 图片库所在文件夹位置
roots = []
img_names = []
img_paths = []

accepted_formats = (".png", ".jpg", ".JPG", ".jpeg")

for root, dirs, files in os.walk(folder_patch):
    for file in files:
        if file.endswith(accepted_formats):
            roots.append(root)
            img_names.append(file)
            file_path = os.path.join(root, file)
            img_paths.append(file_path)
len(img_paths)

9270

In [6]:
index_dino = faiss.IndexFlatL2(768)
for image_path in tqdm(img_paths):
    img = Image.open(image_path).convert('RGB')
    dino_features = extract_features_dino(img)
    add_vector_to_index(dino_features,index_dino)

100%|██████████| 9270/9270 [21:58<00:00,  7.03it/s]  


In [7]:
faiss.write_index(index_dino,f"{folder_patch}\dino_768.index")
# index_dino = faiss.read_index(f"{image_fold}\dino_768.index")
# index_dino = faiss.read_index(f"f:\lcz\dino_640.index")
index_dino.d
index_dino.ntotal

9270

In [16]:
# 创建一个数组来存储所有向量
vectors = np.empty((index_dino.ntotal, index_dino.d), dtype='float32')

# 遍历索引并获取每个向量
for i in range(index_dino.ntotal):
    vectors[i] = index_dino.reconstruct(i)

# 执行k-means聚类
num_clusters = 30
kmeans = faiss.Kmeans(d = index_dino.d, k=num_clusters, gpu=True)
# reconstruct_n 方法从索引中重建所有向量

kmeans.train(vectors)

2166.773193359375

In [17]:
# 'D' 是距离，'I' 是每个查询向量在聚类中心中的索引
D, I = kmeans.index.search(vectors, 1) 
# 获取类别索引
labels = I[:, 0]

In [18]:
labels

array([ 6, 29,  4, ...,  1, 15, 14], dtype=int64)

In [19]:
result_dir = r'E:\work\spatio_evo_urbanvisenv_svi\sv\degree_kmeans_30'

In [20]:
for label_id in tqdm(range(num_clusters)):
    idxs = np.where(labels == label_id)[0] # 取出对应聚类id的图片id
    patch_save_dir = os.path.join(result_dir, str(label_id))   # 保存聚类结果的子文件夹路径
    if not os.path.exists(patch_save_dir):
        os.makedirs(patch_save_dir)
    for i in idxs:
        shutil.copy(img_paths[i], patch_save_dir)

100%|██████████| 30/30 [05:41<00:00, 11.37s/it]
