In [21]:
import pandas as pd
import h5py
import os
import json
import datetime


# Dataset configuration
dataset = "mnist"
dataset_dict = {
    "fashion_mnist": "python/data/raw/fashion-mnist-784-euclidean.hdf5",
    "mnist": "../data/raw/mnist-784-euclidean.hdf5",
    "sift": "python/data/raw/sift-128-euclidean.hdf5",
    "gist": "python/data/raw/gist-960-euclidean.hdf5",
}

def load_ground_truth():
    """Loads ground-truth nearest neighbors from HDF5 file."""
    with h5py.File(dataset_dict[dataset], "r") as f:
        return f["neighbors"][()]

def calculate_recall(results_file, ground_truth_neighbors):
    """Calculates recall for each query in the given CSV results file."""
    results = pd.read_csv(results_file)
    recalls = []

    iteration = 1
    for row in results.iterrows():
        test_query_index = row[1]["test_query_index"]
        #test_query_index = int(row["test_query_index"])
        top_100_result_indexes = row[1]["top_100_result_indexes"].split(";")
        neigbhor_list = []
        for i in range(0, len(top_100_result_indexes)):
            if top_100_result_indexes[i] != '':
                neigbhor_list.append(int(top_100_result_indexes[i]))


        ground_truth_top_100 = set(ground_truth_neighbors[test_query_index][:100])
        retrieved_set = set(neigbhor_list)

        intersection_size = len(ground_truth_top_100 & retrieved_set)
        print(intersection_size)
        recall = intersection_size / 100  # Assuming top-100 recall
        recalls.append({"recall": recall, "iteration": iteration})

        # append to a json file with row number and recall value:
        # recall = intersection_size / 100  # Assuming top-100 recall
        # recalls.append(recall)
        # with open('recall.json', 'w') as f:
        #     json.dump(recalls, f

        iteration += 1

    # Generate timestamp
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

    # Define filename
    filename = f"recall_results_{timestamp}.json"

    # Write to JSON file
    with open(filename, "w") as file:
        json.dump(recalls, file, indent=4)

    print(f"File saved as {filename}")


def evaluate_all_iterations():
    """Evaluates recall across all iterations."""
    ground_truth_neighbors = load_ground_truth()

   
    results_file = f'../../embedded-c++/deletion_results.csv'
    if os.path.exists(results_file):
        recall = calculate_recall(results_file, ground_truth_neighbors)
    else:
        print(f"Results file not found.")


if __name__ == "__main__":
    evaluate_all_iterations()  # Adjust iterations as needed


99
99
97
99
96
91
96
98
92
96
91
96
99
98
94
89
96
96
96
99
91
92
92
89
87
93
91
93
90
90
90
85
93
95
96
89
91
84
80
89
87
81
75
89
84
77
92
81
89
81
73
83
82
80
78
83
86
86
83
81
77
74
79
72
79
79
80
80
80
69
75
76
80
76
68
78
80
89
69
78
78
80
71
75
77
78
66
87
81
65
66
63
78
74
65
64
72
66
67
70
File saved as recall_results_20250228_143638.json
