In [None]:
import os
import numpy as np
import torch
import trimap
import umap
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from torchvision.datasets import MNIST, FashionMNIST, CIFAR100
from torchvision import transforms
from pathlib import Path
from tqdm import tqdm

# Config 
DATASETS = ["CIFAR100", "MNIST", "FashionMNIST"]
METHODS_2D = ["TriMap", "UMAP", "t-SNE", "PCA"]
OUTPUT_DIR = "saved_embeddings"
NDIMS = 2  

# Dataset Loader 
def load_dataset(name):
    transform = transforms.ToTensor()
    if name == "MNIST":
        dataset = MNIST("./data", train=True, download=True, transform=transform)
        X = torch.stack([img[0].squeeze() for img in dataset])
    elif name == "FashionMNIST":
        dataset = FashionMNIST("./data", train=True, download=True, transform=transform)
        X = torch.stack([img[0].squeeze() for img in dataset])
    elif name == "CIFAR100":
        dataset = CIFAR100("./data", train=True, download=True, transform=transform)
        X = torch.stack([img[0] for img in dataset])
    else:
        raise ValueError("Invalid dataset name")
    X_flat = X.view(X.size(0), -1).numpy()
    return X_flat

# Projection Function 
def project(method, X_flat, dim):
    if method == "TriMap":
        return trimap.TRIMAP(n_dims=dim).fit_transform(X_flat)
    elif method == "UMAP":
        return umap.UMAP(n_components=dim).fit_transform(X_flat)
    elif method == "t-SNE":
        return TSNE(n_components=dim).fit_transform(X_flat)
    elif method == "PCA":
        return PCA(n_components=dim).fit_transform(X_flat)
    else:
        raise ValueError(f"Unknown method: {method}")
#save
os.makedirs(OUTPUT_DIR, exist_ok=True)
TRIMAP_DIMS = list(range(2, NDIMS + 1))

for dataset in DATASETS:
    print(f"\nLoading dataset: {dataset}")
    X_flat = load_dataset(dataset)

    for method in METHODS_2D:
        dims = TRIMAP_DIMS if method == "TriMap" else [2]

        for dim in dims:
            print(f"Projecting {dataset} using {method} ({dim}D)")
            emb = project(method, X_flat, dim)
            file_path = os.path.join(OUTPUT_DIR, f"{dataset}_{method}_{dim}D_embeddings.npy")
            np.save(file_path, emb)
            print(f"[Saved] {file_path}")


In [None]:
import os
import numpy as np
import torch
from sklearn.metrics import silhouette_score
from sklearn.manifold import trustworthiness
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
from scipy.spatial.distance import pdist, squareform
from scipy.stats import spearmanr
from torchvision.datasets import MNIST, FashionMNIST, CIFAR100
from torchvision import transforms
import json

# Settings 
TARGET_DATASET = "FashionMNIST"  # or "MNIST", "CIFAR100"
TARGET_METRIC = "silhouette"  # silhouette, knn , pearson
METHODS = ["TriMap", "UMAP", "t-SNE", "PCA"]
EMBED_DIR = "saved_embeddings"
K_NEIGHBORS = 10

# Dataset Loader 
def load_dataset(name):
    transform = transforms.ToTensor()
    if name == "MNIST":
        ds = MNIST(root="data", train=True, download=False, transform=transform)
    elif name == "FashionMNIST":
        ds = FashionMNIST(root="data", train=True, download=False, transform=transform)
    elif name == "CIFAR100":
        ds = CIFAR100(root="data", train=True, download=False, transform=transform)
    else:
        raise ValueError("Invalid dataset")
    X = torch.stack([x[0].squeeze() for x in ds])
    y = np.array([x[1] for x in ds])
    X_flat = X.view(X.size(0), -1).numpy()
    return X_flat, y


# Convert NumPy types for JSON 
def convert_numpy_types(d):
    out = {}
    for k, v in d.items():
        if isinstance(v, (np.float32, np.float64)):
            out[k] = float(v)
        elif isinstance(v, (np.int32, np.int64)):
            out[k] = int(v)
        else:
            out[k] = v
    return out

# Main Loop 
print(f"\nLoading dataset: {TARGET_DATASET}")
X_orig, labels = load_dataset(TARGET_DATASET)

results = {}

for method in METHODS:
    key = f"{TARGET_DATASET}_{method}_2D"
    emb_file = os.path.join(EMBED_DIR, f"{key}_embeddings.npy")
    if not os.path.exists(emb_file):
        print(f"[Missing] {emb_file}")
        continue

    print(f"Evaluating {TARGET_METRIC} on {key}")
    X_emb = np.load(emb_file)

    try:
        if TARGET_METRIC == "silhouette":
            metric_val = silhouette_score(X_emb, labels)

        elif TARGET_METRIC == "knn":
            idx = np.random.permutation(len(labels))
            train_idx, test_idx = idx[:int(0.8 * len(idx))], idx[int(0.8 * len(idx)):]
            knn = KNeighborsClassifier(n_neighbors=K_NEIGHBORS).fit(X_emb[train_idx], labels[train_idx])
            metric_val = knn.score(X_emb[test_idx], labels[test_idx])

        elif TARGET_METRIC == "pearson":
            idx = np.random.choice(len(X_orig), 2000, replace=False)
            D_high = squareform(pdist(X_orig[idx]))
            D_low = squareform(pdist(X_emb[idx]))
            flat_high = D_high.ravel()
            flat_low = D_low.ravel()
            metric_val = np.corrcoef(flat_high, flat_low)[0, 1]
            if np.isnan(metric_val):
                metric_val = 0.0
        else:
            raise ValueError("Invalid metric")

        results[key] = convert_numpy_types({TARGET_METRIC: metric_val})
    except Exception as e:
        print(f"Error computing {TARGET_METRIC} for {key}: {e}")

# Save results 
out_file = f"{TARGET_DATASET}_{TARGET_METRIC}_results.json"
with open(out_file, "w") as f:
    json.dump(results, f, indent=2)
print(f"\nSaved {TARGET_METRIC} results to {out_file}")
