In [2]:
import os
import shutil
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoProcessor, AutoModel

# 이미지 경로 설정
IMAGE_DIR = "/Users/aohus/Workspaces/github/image-cluster/1차"

# 이미지 목록 로딩
image_paths = [
    os.path.join(IMAGE_DIR, fname)
    for fname in os.listdir(IMAGE_DIR)
    if fname.lower().endswith(("png", "jpg", "jpeg"))
]


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# 모델 및 전처리기 로드
model_id = "facebook/dinov2-giant"
device = "cuda" if torch.cuda.is_available() else "cpu"

processor = AutoProcessor.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id).to(device)
model.eval()

Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 1536, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-39): 40 x Dinov2Layer(
        (norm1): LayerNorm((1536,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2SdpaAttention(
          (attention): Dinov2SdpaSelfAttention(
            (query): Linear(in_features=1536, out_features=1536, bias=True)
            (key): Linear(in_features=1536, out_features=1536, bias=True)
            (value): Linear(in_features=1536, out_features=1536, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=1536, out_features=1536, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
      

In [9]:
# 이미지 임베딩 추출 함수
def extract_dino_embedding(img_path):
    img = Image.open(img_path).convert("RGB")
    inputs = processor(images=img, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state[:, 0, :]  # [CLS] token
        return embedding.cpu().numpy().flatten()

# 모든 이미지 임베딩 추출
print("DINOv2 임베딩 추출 중...")
embeddings = []
for path in tqdm(image_paths):
    emb = extract_dino_embedding(path)
    embeddings.append(emb)
embeddings = np.array(embeddings)

# 유사도 기반 그룹핑
def group_similar_embeddings(embeddings, threshold=0.9):
    similarity_matrix = cosine_similarity(embeddings)
    used = np.zeros(len(embeddings), dtype=bool)
    groups = []

    for idx in range(len(embeddings)):
        if not used[idx]:
            similar_idxs = np.where(similarity_matrix[idx] >= threshold)[0]
            groups.append(similar_idxs)
            used[similar_idxs] = True
    return groups

DINOv2 임베딩 추출 중...


100%|██████████| 232/232 [03:14<00:00,  1.19it/s]


In [11]:
OUTPUT_DIR = IMAGE_DIR + f"_dino_cosine_similarity"

groups = group_similar_embeddings(embeddings, threshold=0.70)

# 그룹별 이미지 저장
print("그룹별 이미지 저장 중...")
for group_id, group_idxs in enumerate(groups):
    group_folder = os.path.join(OUTPUT_DIR, f"group_{group_id}")
    os.makedirs(group_folder, exist_ok=True)
    for idx in group_idxs:
        shutil.copy(image_paths[idx], group_folder)

print(f"DINOv2 기반 공간 그룹핑 완료! 결과는 '{OUTPUT_DIR}'에 저장됨.")

그룹별 이미지 저장 중...
DINOv2 기반 공간 그룹핑 완료! 결과는 '/Users/aohus/Workspaces/github/image-cluster/1차_dino_cosine_similarity'에 저장됨.
