# Setup

In [1]:
import numpy as np
from numpy.linalg import norm

# Combined Evaluation

#### Defining Functions

In [2]:
def load_and_combine_embeddings(fpnet_file, taunet_file):
    # Load embeddings
    embeddings_fpnet = np.loadtxt(fpnet_file, delimiter=',', skiprows=1)
    embeddings_taunet = np.loadtxt(taunet_file, delimiter=',', skiprows=1)
    # Check if any of the files are empty or have different number of rows
    if embeddings_fpnet.shape[0] != embeddings_taunet.shape[0]:
        raise ValueError("The embeddings files have different number of rows.")
    # Concatenate embeddings
    combined_embeddings = np.concatenate((embeddings_fpnet, embeddings_taunet), axis=1)
    # L2 normalize the combined embeddings
    l2_normalized_embeddings = combined_embeddings / norm(combined_embeddings, axis=1, keepdims=True)
    return l2_normalized_embeddings

def calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, n):
    correct_matches = 0
    for i in range(len(embeddings1)):
        # Compute Euclidean distances from embeddings1[i] to all embeddings2
        distances = np.linalg.norm(embeddings2 - embeddings1[i], axis=1)
        # Get the indices of the top n closest embeddings in embeddings2
        closest_indices = np.argsort(distances)[:n]
        # Check if the correct label is within these top n closest embeddings
        if labels1[i] in labels2[closest_indices]:
            correct_matches += 1
    # Calculate rank n accuracy
    accuracy = correct_matches / len(embeddings1)
    return accuracy

#### Embed Features

In [3]:
# Process embeddings1
embeddings1_combined_normalized = load_and_combine_embeddings('embeddings1_fpnet.csv', 'embeddings1_taunet.csv')
# Process embeddings2
embeddings2_combined_normalized = load_and_combine_embeddings('embeddings2_fpnet.csv', 'embeddings2_taunet.csv')

#### Create Labels

In [4]:
# Indexes are equivalent to the user id, both csv's indexes should match
labels1 = [i for i in range(0,len(embeddings1_combined_normalized))]
labels2 = [i for i in range(0,len(embeddings1_combined_normalized))]
labels1 = np.array(labels1)
labels2 = np.array(labels2)

#### Concatenate and L2 Normalize Embeddings

In [5]:
# Calculate rank-n accuracies
accuracy = calculate_rank_n_accuracy(embeddings1_combined_normalized, embeddings2_combined_normalized, labels1, labels2, 1)
accuracy2 = calculate_rank_n_accuracy(embeddings1_combined_normalized, embeddings2_combined_normalized, labels1, labels2, 10)
accuracy3 = calculate_rank_n_accuracy(embeddings1_combined_normalized, embeddings2_combined_normalized, labels1, labels2, 100)
print(f"Rank-1 Accuracy: {accuracy}")
print(f"Rank-10 Accuracy: {accuracy2}")
print(f"Rank-100 Accuracy: {accuracy3}")

Rank-1 Accuracy: 0.744068409008792
Rank-10 Accuracy: 0.9318318679995182
Rank-100 Accuracy: 0.9927736962543658
