In [15]:
import torch
from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import sys
sys.path.append('/workspaces/dbm25/task_1_2')
from tabulate import tabulate


from extract_features import extract_features



In [16]:
# Load feature space from database
feature_space_train = torch.load("/workspaces/dbm25/data/extracted_features.pt")
feature_space_test = torch.load("/workspaces/dbm25/data/extracted_features_part_2.pt")

In [17]:
def top_k_distance_search(image_path, k, feature_model, measure):
    """
    Finds the k most similar images to an input image based on a feature model and distance metric.
    
    Parameters:
    ----------
    image_path : str
        Path to the input image.
    k : int
        Number of similar images to return.
    feature_model : str
        Feature extraction model identifier to use.
    measure : str
        Distance metric name (compatible with sklearn.metrics.DistanceMetric).
    
    Returns:
    -------
    list of dict
        List of k dictionaries containing similar image information:
        - "image_name": image name
        - "file_path": image path
        - "class": image class
        - "distance_score": distance score (lower = more similar)
    """
    


    
    image_query_surrogate = [element for element in feature_space_test if element["file_path"] == image_path][0]
    if image_query_surrogate is None:
        image_query_surrogate = [element[feature_model] for element in feature_space_train if element["file_path"] == image_path][0]

    
    # Get and reshape (linearize) query image features
    image_query_feature = image_query_surrogate[feature_model].reshape(1,-1) 
    
    # Initialize distance metric
    dist = DistanceMetric.get_metric(measure)
    results = []

    # Compare query image against each image in the database
    for image_surrogate in feature_space_train:
        image_surrogate_feature = image_surrogate[feature_model].reshape(1,-1)
        
        # Calculate distance between query image and current database image
        distance_score = dist.pairwise(image_query_feature, image_surrogate_feature)[0][0]
        image_name = image_surrogate["file_path"].split("/").pop()

        new_result = {
            "image_name": image_name,
            "file_path": image_surrogate["file_path"],
            "class": image_surrogate["class"],
            "distance_score": distance_score
        }
        
        # Maintain a list of only the k most similar images
        if len(results) < k:
            results.append(new_result)
        else:
            # Sort by distance (descending - largest distance first)
            results.sort(key=lambda element: element["distance_score"], reverse=True)
            
            # Replace least similar image if current one is more similar
            if results[0]["distance_score"] > new_result["distance_score"]:
                results.pop(0)
                results.append(new_result)

    return results

In [18]:
def knn_search(image_path, k_neighbors, feature_model, measure):


   vote_results = {
      "brain_glioma" : 0,
      "brain_menin" : 0,
      "brain_tumor" : 0
   }

   search_results = top_k_distance_search(image_path=image_path, 
                                k=k_neighbors, 
                                feature_model=feature_model,
                                measure = measure)
   
   for result in search_results:
      vote_results[result["class"]] += 1

   sortedResults = sorted(vote_results.items(), key=lambda x:x[1], reverse=True)


   return sortedResults[:1]


In [None]:


# shuffle the feature space
np.random.shuffle(feature_space_test)

feature_model = "layer3" # best is layer3
measure = "euclidean"
k_neighbors = 50  # Number of neighbors to consider

# Load pre-trained ResNet50 model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
model.eval()

y_true = []
y_pred = []
for image in feature_space_test[:500]:
   image_path = image["file_path"]
   true_class = image["class"]
   predicted_class = knn_search(image_path=image_path, k_neighbors=k_neighbors, feature_model=feature_model, measure=measure)
   y_true.append(true_class)
   y_pred.append(predicted_class[0][0])

# Calcola precision, recall, f1 e accuracy
labels = ["brain_glioma", "brain_menin", "brain_tumor"]

# Calcola le metriche
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

# Stampa metriche per ogni label
for i, label in enumerate(labels):
    print(f"Label: {label}")
    print(f"  Precision: {precision[i]:.4f}")
    print(f"  Recall:    {recall[i]:.4f}")
    print(f"  F1-score:  {f1[i]:.4f}")
    print()

print(f"Overall accuracy: {accuracy:.4f}")



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Label: brain_glioma
  Precision: 0.9118
  Recall:    0.9394
  F1-score:  0.9254

Label: brain_menin
  Precision: 0.8889
  Recall:    0.7742
  F1-score:  0.8276

Label: brain_tumor
  Precision: 0.8974
  Recall:    0.9722
  F1-score:  0.9333

Overall accuracy: 0.9000
