<a href="https://colab.research.google.com/github/elisa-negrini/ML-project/blob/main/ML1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install torch torchvision faiss-cpu transformers datasets tqdm


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from tor

In [None]:
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
import os
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def extract_embedding(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        embeddings = model.get_image_features(**inputs)
    return embeddings[0].cpu().numpy()

# Esempio:
embeddings = []
for filename in os.listdir("data"):
    if filename.endswith(".jpg"):
        emb = extract_embedding(os.path.join("data", filename))
        embeddings.append(emb)

np.save("embeddings/features.npy", np.vstack(embeddings))


In [None]:
!pip install faiss-cpu # faiss-cpu can be used if you need the CPU version
import faiss
import numpy as np
# %%

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m65.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [None]:


features = np.load("embeddings/features.npy").astype('float32')
index = faiss.IndexFlatL2(features.shape[1])  # usa cosine se normalizzi
faiss.normalize_L2(features)  # cosine similarity
index.add(features)

D, I = index.search(features, k=5)  # top-5 retrieval
print(I[0])  # indici delle 5 immagini più simili alla prima


[0 1 2 4 3]


In [None]:
import torch.nn as nn
import torch

class EmbeddingNet(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = torchvision.models.resnet50(pretrained=True)
        self.base = nn.Sequential(*list(resnet.children())[:-1])  # rimuovi classifier
        self.fc = nn.Linear(resnet.fc.in_features, 512)

    def forward(self, x):
        x = self.base(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return nn.functional.normalize(x, p=2, dim=1)

# Triplet Loss con hard mining (opzionale)
loss_fn = nn.TripletMarginLoss(margin=0.2, p=2)


In [None]:
# Esempio base per Recall@K
def recall_at_k(retrieved_ids, true_ids, k):
    correct = sum([true in retrieved[:k] for true, retrieved in zip(true_ids, retrieved_ids)])
    return correct / len(true_ids)
