In [43]:
import pandas as pd
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import numpy as np
import umap
import torch
import warnings
import os
warnings.filterwarnings("ignore", category=FutureWarning, message="'force_all_finite' was renamed to 'ensure_all_finite'")
cwd = os.getcwd()

In [44]:
# Mega Descriptor embeddings
df_query_features = pd.read_parquet(f"{cwd}\\data\\embeddings\\megadescriptor-l-384\\query_embeddings_features.parquet")
df_query_metadata = pd.read_parquet(f"{cwd}\\data\\embeddings\\megadescriptor-l-384\\query_embeddings_metadata.parquet")
df_query_features["image_id"] = df_query_metadata["image_id"]
df_query_predictions = pd.read_csv(f"{cwd}\\data\\predictions\\20250425-megadescriptor-l-384.csv")

# Merge features and predictions
df_query_features_predictions = df_query_features.merge(df_query_predictions, on="image_id", how='left')
features = df_query_features_predictions.drop(columns=['identity'])
features.columns = features.columns.astype(str)
labels = df_query_features_predictions['identity']

# Calculate silhouette score
score = silhouette_score(features, labels)
print("Silhouette Score:", score)

# PCA
reduced = PCA(n_components=2).fit_transform(features)
score = silhouette_score(reduced, labels)
print("PCA Silhouette Score:", score)

# UMAP
umap_model = umap.UMAP(n_neighbors=5, min_dist=0.3, metric="correlation")
reduced = umap_model.fit_transform(features)
score = silhouette_score(reduced, labels)
print("UMAP Silhouette Score:", score)

Silhouette Score: -0.8455504111187095
PCA Silhouette Score: -0.8596465178578592
UMAP Silhouette Score: -0.701258
