In [12]:
import json
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import SiglipProcessor, SiglipModel
from sklearn.metrics import average_precision_score
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Betöltés
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SiglipModel.from_pretrained("google/siglip-base-patch16-224").to(device)
processor = SiglipProcessor.from_pretrained("google/siglip-base-patch16-224")

# Adatok
embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")
with open("C:/Users/Adam/Desktop/applied_ml/custom_captions.json", encoding="utf-8") as f:
    class_texts = json.load(f)

# Top 1-ek hasonlósága

In [None]:
# Eredmények tárolása
top1_results = []

# Végigmegyünk mind a 102 osztályon
for class_id in tqdm(range(1, 103), desc="Top-1 similarity számolása"):
    str_id = str(class_id)
    lines = class_texts[str_id]
    full_text = " ".join(lines)

    # Szöveg → embedding
    inputs = processor(text=[full_text], return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        text_feat = model.get_text_features(**inputs)
        text_feat = text_feat / text_feat.norm(p=2, dim=-1, keepdim=True)
        text_feat = text_feat.cpu()

    # Cosine similarity az összes képre
    similarities = torch.matmul(embeddings, text_feat.T).squeeze()

    # Top-1 érték kinyerése
    top1_score = similarities.max().item()

    top1_results.append({
        "class_id": class_id,
        "top1_similarity": round(top1_score, 5)
    })

# DataFrame-be tesszük
df_top1 = pd.DataFrame(top1_results)
print(df_top1.head())

# CSV-be mentés (ha szeretnéd)
df_top1.to_csv("C:/Users/Adam/Desktop/applied_ml/class_top1_similarity.csv", index=False)

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")
Top-1 similarity számolása: 100%|██████████| 102/102 [00:01<00:00, 53.43it/s]

   class_id  top1_similarity
0         1          0.16427
1         2          0.13237
2         3          0.16147
3         4          0.15085
4         5          0.14088





In [None]:
# Eredmények tárolása
precision_results = []

# Végigmegyünk mind a 102 osztályon
for class_id in tqdm(range(1, 103), desc="Precision@N számolása"):
    str_id = str(class_id)
    lines = class_texts[str_id]
    full_text = " ".join(lines)

    # Szöveg → embedding
    inputs = processor(text=[full_text], return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        text_feat = model.get_text_features(**inputs)
        text_feat = text_feat / text_feat.norm(p=2, dim=-1, keepdim=True)
        text_feat = text_feat.cpu()

    # Cosine similarity az összes képre
    similarities = torch.matmul(embeddings, text_feat.T).squeeze()

    # Az adott osztályhoz tartozó képek száma
    class_size = sum(1 for info in infos if int(info["label"]) == class_id)

    # Top N visszakeresés (N = class_size)
    top_indices = similarities.topk(class_size).indices

    # Hány top kép van valóban a class_id osztályban?
    correct = sum(1 for idx in top_indices if int(infos[idx]["label"]) == class_id)

    precision = correct / class_size if class_size > 0 else 0.0

    precision_results.append({
        "class_id": class_id,
        "class_size": class_size,
        "correct_in_topN": correct,
        "precision_at_class_size": round(precision, 5)
    })

# DataFrame-be tesszük
df_precision = pd.DataFrame(precision_results)
print(df_precision.head())

# CSV mentés
df_precision.to_csv("C:/Users/Adam/Desktop/applied_ml/class_precision_at_class_size.csv", index=False)

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")
Precision@N számolása: 100%|██████████| 102/102 [00:01<00:00, 69.44it/s]

   class_id  class_size  correct_in_topN  precision_at_class_size
0         1          27               20                  0.74074
1         2          49               40                  0.81633
2         3          36               25                  0.69444
3         4          44               39                  0.88636
4         5          54               39                  0.72222





In [None]:
# Kimenet tárolása
recall_results = []

# Osztályonként végigmegyünk
for class_id in tqdm(range(1, 103), desc="Recall cutoff számolása"):
    str_id = str(class_id)
    lines = class_texts[str_id]
    full_text = " ".join(lines)

    # Text embedding
    inputs = processor(text=[full_text], return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        text_feat = model.get_text_features(**inputs)
        text_feat = text_feat / text_feat.norm(p=2, dim=-1, keepdim=True)
        text_feat = text_feat.cpu()

    # Cosine similarity az összes képre
    similarities = torch.matmul(embeddings, text_feat.T).squeeze()
    sorted_indices = torch.argsort(similarities, descending=True)

    # Bináris vektor: 1, ha helyes osztályba tartozik
    binary_hits = [1 if int(infos[i]["label"]) == class_id else 0 for i in sorted_indices]
    cumsum_hits = np.cumsum(binary_hits) # a visszakeresett listában hány jó találat van eddig összesítve

    class_size = sum(binary_hits)  # az adott osztályhoz tartozó képek száma
    recall_targets = {
        70: int(np.ceil(class_size * 0.70)),
        80: int(np.ceil(class_size * 0.80)),
        90: int(np.ceil(class_size * 0.90)),
        95: int(np.ceil(class_size * 0.95)),
    }

    topN_at = {}
    threshold_at = {}

    for perc, target in recall_targets.items():
        found_index = next((i for i, val in enumerate(cumsum_hits) if val >= target), None) # keressük azt az első olyan indexet, ahol már elértük vagy túlléptük a kívánt számú helyes találatot
        if found_index is not None:
            topN_at[f"topN_{perc}"] = found_index + 1  # index → N
            threshold_at[f"threshold_{perc}"] = round(similarities[sorted_indices[found_index]].item(), 5)
        else:
            topN_at[f"topN_{perc}"] = None
            threshold_at[f"threshold_{perc}"] = None

    result = {
        "class_id": class_id,
        "class_size": class_size,
        **topN_at,
        **threshold_at
    }

    recall_results.append(result)

# Eredmények táblázatba
df_recall = pd.DataFrame(recall_results)
print(df_recall.head())

# Mentés CSV-be
df_recall.to_csv("C:/Users/Adam/Desktop/applied_ml/class_recall_cutoffs.csv", index=False)

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")
Recall cutoff számolása: 100%|██████████| 102/102 [00:04<00:00, 23.02it/s]

   class_id  class_size  topN_70  topN_80  topN_90  topN_95  threshold_70  \
0         1          27       26       34       50       51       0.13915   
1         2          49       37       45       75       86       0.09562   
2         3          36       39       47       67      227       0.11541   
3         4          44       32       39       45       52       0.12268   
4         5          54       50       80       90      114       0.11872   

   threshold_80  threshold_90  threshold_95  
0       0.13033       0.11447       0.11366  
1       0.08807       0.07893       0.07664  
2       0.11272       0.10759       0.08375  
3       0.11493       0.10914       0.10568  
4       0.11281       0.11144       0.10730  





# bert topic

In [None]:
# Eredmények
ap_results = []

for class_id in tqdm(range(1, 103), desc="AP számolása"):
    str_id = str(class_id)
    lines = class_texts[str_id]
    full_text = " ".join(lines)

    # Szöveg embedding
    inputs = processor(text=[full_text], return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        text_feat = model.get_text_features(**inputs)
        text_feat = text_feat / text_feat.norm(p=2, dim=-1, keepdim=True)
        text_feat = text_feat.cpu()

    # Cosine similarity
    similarities = torch.matmul(embeddings, text_feat.T).squeeze().numpy()

    # Binary ground truth: 1 ha jó osztály, különben 0
    gt_labels = [1 if int(info["label"]) == class_id else 0 for info in infos]

    # Average precision
    if sum(gt_labels) > 0:
        ap = average_precision_score(gt_labels, similarities)
    else:
        ap = 0.0

    ap_results.append({
        "class_id": class_id,
        "average_precision": round(ap, 5)
    })

# DataFrame + mAP
df_ap = pd.DataFrame(ap_results)
mean_ap = df_ap["average_precision"].mean()
print(df_ap.head())
print(f"\nMean Average Precision (mAP): {round(mean_ap, 5)}")

# CSV mentés
df_ap.to_csv("C:/Users/Adam/Desktop/applied_ml/class_average_precision.csv", index=False)

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")
AP számolása: 100%|██████████| 102/102 [00:02<00:00, 39.19it/s]

   class_id  average_precision
0         1            0.79286
1         2            0.91528
2         3            0.78243
3         4            0.96496
4         5            0.81550

Mean Average Precision (mAP): 0.87911





# Innentől Image rész

In [None]:
# Cosine similarity mátrix (n x n)
print("🧠 Cosine similarity mátrix számolása...")
embedding_matrix = embeddings.numpy()
sim_matrix = cosine_similarity(embedding_matrix)

# Diagonálisan 1-es lenne (önmagával) – ezt kilőjük
np.fill_diagonal(sim_matrix, -1)

# Eredmények tárolása
top1_results = []

for i in tqdm(range(sim_matrix.shape[0]), desc="Top-1 keresés képenként"):
    top1_idx = sim_matrix[i].argmax()
    top1_sim = sim_matrix[i][top1_idx]

    label_i = int(infos[i]["label"])
    label_top1 = int(infos[top1_idx]["label"])
    same_class = (label_i == label_top1)

    top1_results.append({
        "image_index": i,
        "top1_index": top1_idx,
        "top1_similarity": round(top1_sim, 5),
        "label": label_i,
        "top1_label": label_top1,
        "same_class": same_class
    })

# Mentés
df_top1 = pd.DataFrame(top1_results)
df_top1.to_csv("C:/Users/Adam/Desktop/applied_ml/image_top1_similarity.csv", index=False)
print("✅ Kész: image_top1_similarity.csv")

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")


🧠 Cosine similarity mátrix számolása...


Top-1 keresés képenként: 100%|██████████| 6552/6552 [00:00<00:00, 75146.92it/s]

✅ Kész: image_top1_similarity.csv





In [None]:
# Cosine similarity mátrix (n x n)
print("🧠 Cosine similarity mátrix számolása...")
embedding_matrix = embeddings.numpy()
sim_matrix = cosine_similarity(embedding_matrix)

# Diagonálisan -1, hogy önmagát ne hozza vissza
np.fill_diagonal(sim_matrix, -1)

# Recall@K értékek
K_values = [1, 5, 10, 20]
recall_records = []

print("📊 Recall@K számolása minden képre...")
for i in tqdm(range(sim_matrix.shape[0]), desc="Recall képenként"):
    true_label = int(infos[i]["label"])
    top_indices = np.argsort(-sim_matrix[i])  # descending sorrend

    recalls = {}
    # Az aktuális osztályhoz tartozó képek száma (önmagát kivéve)
    total_same_class = sum(1 for j in range(len(infos)) if j != i and int(infos[j]["label"]) == true_label)

    for k in K_values:
        top_k = top_indices[:k]
        correct = sum(1 for j in top_k if int(infos[j]["label"]) == true_label)
        recall = correct / total_same_class if total_same_class > 0 else 0.0
        recalls[f"recall@{k}"] = round(recall, 5)

    recall_records.append({
        "image_index": i,
        "label": true_label,
        **recalls
    })

# DataFrame mentés
df_recall = pd.DataFrame(recall_records)
df_recall.to_csv("C:/Users/Adam/Desktop/applied_ml/image_recall_at_k.csv", index=False)

# Átlag recall@k kiírás
mean_recalls = df_recall[[f"recall@{k}" for k in K_values]].mean()
print("\n📈 Átlagos Recall@K értékek:")
for k in K_values:
    print(f"Recall@{k}: {round(mean_recalls[f'recall@{k}'], 5)}")

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")


🧠 Cosine similarity mátrix számolása...
📊 Recall@K számolása minden képre...


Recall képenként: 100%|██████████| 6552/6552 [00:08<00:00, 764.70it/s]


📈 Átlagos Recall@K értékek:
Recall@1: 0.01568
Recall@5: 0.07727
Recall@10: 0.15266
Recall@20: 0.29798





In [None]:
# Cosine similarity mátrix
print("🧠 Cosine similarity mátrix számolása...")
embedding_matrix = embeddings.numpy()
sim_matrix = cosine_similarity(embedding_matrix)

# Saját magát nem hasonlítjuk (AP-ben nincs értelme)
np.fill_diagonal(sim_matrix, -1)

# Eredmények
ap_results = []

print("📊 Average Precision számítása minden képre...")
for i in tqdm(range(sim_matrix.shape[0]), desc="AP per image"):
    label = int(infos[i]["label"])

    # Ground truth: 1 ha ugyanabba az osztályba tartozik, különben 0 (önmagát nem számítjuk)
    gt = np.array([1 if int(infos[j]["label"]) == label else 0 for j in range(len(infos))])
    gt[i] = 0  # önmagát nullázzuk

    scores = sim_matrix[i]

    if gt.sum() > 0:
        ap = average_precision_score(gt, scores)
    else:
        ap = 0.0

    ap_results.append({
        "image_index": i,
        "label": label,
        "average_precision": round(ap, 5)
    })

# DataFrame + mAP
df_ap = pd.DataFrame(ap_results)
mean_ap = df_ap["average_precision"].mean()
print(df_ap.head())
print(f"\n📈 Mean Average Precision (mAP) for images: {round(mean_ap, 5)}")

# Mentés
df_ap.to_csv("C:/Users/Adam/Desktop/applied_ml/image_average_precision.csv", index=False)

  embeddings = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_embeddings_siglip.pt")
  infos = torch.load("C:/Users/Adam/Desktop/applied_ml/dataset/image_info_siglip.pt")


🧠 Cosine similarity mátrix számolása...
📊 Average Precision számítása minden képre...


AP per image: 100%|██████████| 6552/6552 [00:13<00:00, 486.15it/s]

   image_index  label  average_precision
0            0      1            0.80528
1            1      1            0.80784
2            2      1            0.80397
3            3      1            0.79885
4            4      1            0.77852

📈 Mean Average Precision (mAP) for images: 0.8723



