In [None]:
from datasets import load_dataset
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import torch, torch.nn as nn, numpy as np

ds = load_dataset("ILSVRC/imagenet-1k", split="validation", streaming=True) 
print(ds)

  mod = _original_import(name, globals, locals, fromlist, level)


IterableDataset({
    features: ['image', 'label'],
    num_shards: 14
})


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import IterableDataset, DataLoader   # ✅ add IterableDataset here
from torchvision import transforms, models
from datasets import load_dataset
import numpy as np
from tqdm import tqdm

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

class StreamingImageNet(IterableDataset):
    def __init__(self, hf_ds, transform=None, max_samples=None):
        self.ds = hf_ds
        self.transform = transform
        self.max_samples = max_samples

    def __iter__(self):
        for i, item in enumerate(self.ds):
            image = item["image"].convert("RGB")  
            label = item["label"]
            if self.transform:
                image = self.transform(image)
            yield image, label
            if self.max_samples and i >= self.max_samples:
                break


imagenet_val = StreamingImageNet(ds, transform=transform) 
dataloader = DataLoader(imagenet_val, batch_size=64, num_workers=0)


# Load pretrained ResNet-50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet50(weights="IMAGENET1K_V1")
resnet.fc = nn.Identity()
resnet.eval().to(device)

# Extract embeddings
def extract_embeddings(loader, model, device):
    all_feats, all_labels = [], []
    with torch.no_grad():
        for imgs, labels in tqdm(loader):
            imgs = imgs.to(device)
            feats = model(imgs)
            all_feats.append(feats.cpu().numpy())
            all_labels.append(labels.numpy())
    return np.concatenate(all_feats), np.concatenate(all_labels)

embeddings, labels = extract_embeddings(dataloader, resnet, device)
print("Embeddings:", embeddings.shape)

782it [08:16,  1.58it/s]


Embeddings: (50000, 2048)


In [3]:
embeddings.shape

(50000, 2048)

In [None]:
import numpy as np

from PLDiv_sparse import PLDiv_Sparse_MST, fast_PLDiv_approx
import statistics
import time
from magnipy.diversipy import Diversipy
from vendi_score import vendi
import torch
from sklearn.metrics.pairwise import rbf_kernel, laplacian_kernel, cosine_distances, euclidean_distances, manhattan_distances

In [2]:
embeddings = np.load("embeddings.npy")

Cosine Similarity

In [36]:
dist_matrix = cosine_distances(embeddings)

In [5]:
def dcscore (sim_product):
    sim_product = torch.from_numpy(sim_product)
    sim_probs = sim_product.softmax(dim=-1)
    diversity = torch.sum(torch.diag(sim_probs))
    return diversity.item()

In [38]:
dist_matrix_sub = dist_matrix[:5000, :5000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(5000, 5000)
(5000, 5000)


In [42]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = fast_PLDiv_approx(dist_matrix_sub)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")


mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 5.402 seconds
Run 2: 5.444 seconds
Run 3: 5.440 seconds
Run 4: 5.441 seconds
Run 5: 5.445 seconds

Average runtime: 5.43 ± 0.02 s

Average Diverity: 46.51 ± 0.00 s


In [43]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 3.961 seconds
Run 2: 3.955 seconds
Run 3: 3.927 seconds
Run 4: 3.965 seconds
Run 5: 4.030 seconds

Average runtime: 3.97 ± 0.03 s

Average Diverity: 46.52 ± 0.00 s


In [44]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 2.606 seconds
Run 2: 2.608 seconds
Run 3: 2.614 seconds
Run 4: 2.615 seconds
Run 5: 2.619 seconds

Average runtime: 2.61 ± 0.00 s

Average Diverity: 47.32 ± 0.00 s


In [45]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    vs = vendi.score_K(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(vs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 3.247 seconds
Run 2: 1.130 seconds
Run 3: 1.133 seconds
Run 4: 1.279 seconds
Run 5: 1.207 seconds

Average runtime: 1.60 ± 0.83 s

Average Diverity: 26.50 ± 0.00 s


In [46]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    dcs = dcscore(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(dcs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 0.070 seconds
Run 2: 0.022 seconds
Run 3: 0.022 seconds
Run 4: 0.021 seconds
Run 5: 0.023 seconds

Average runtime: 0.03 ± 0.02 s

Average Diverity: 1.60 ± 0.00 s


In [47]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric="precomputed")
    mag_areas = div.MagAreas()

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(mag_areas)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 203.488 seconds
Run 2: 132.986 seconds
Run 3: 144.664 seconds
Run 4: 145.673 seconds
Run 5: 197.732 seconds

Average runtime: 164.91 ± 29.55 s

Average Diverity: 1796.71 ± 0.00 s


In [82]:
dist_matrix = cosine_distances(embeddings)
dist_matrix_c = dist_matrix.copy()
dist_matrix_sub = dist_matrix_c[:10000, :10000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(10000, 10000)
(10000, 10000)


In [49]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = fast_PLDiv_approx(dist_matrix_sub)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")


mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 24.350 seconds
Run 2: 24.255 seconds
Run 3: 24.432 seconds
Run 4: 24.208 seconds
Run 5: 24.406 seconds

Average runtime: 24.33 ± 0.09 s

Average Diverity: 78.01 ± 0.00 s


In [50]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 17.221 seconds
Run 2: 17.249 seconds
Run 3: 16.673 seconds
Run 4: 16.442 seconds
Run 5: 16.392 seconds

Average runtime: 16.80 ± 0.37 s

Average Diverity: 78.03 ± 0.01 s


In [51]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 9.851 seconds
Run 2: 9.860 seconds
Run 3: 9.798 seconds
Run 4: 9.870 seconds
Run 5: 9.956 seconds

Average runtime: 9.87 ± 0.05 s

Average Diverity: 79.70 ± 0.00 s


In [55]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    vs = vendi.score_K(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(vs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 7.875 seconds
Run 2: 8.002 seconds
Run 3: 13.901 seconds
Run 4: 14.091 seconds
Run 5: 10.242 seconds

Average runtime: 10.82 ± 2.73 s

Average Diverity: 26.79 ± 0.00 s


In [56]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    dcs = dcscore(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(dcs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 0.069 seconds
Run 2: 0.071 seconds
Run 3: 0.067 seconds
Run 4: 0.061 seconds
Run 5: 0.062 seconds

Average runtime: 0.07 ± 0.00 s

Average Diverity: 1.60 ± 0.00 s


In [83]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric="precomputed")
    mag_areas = div.MagAreas()

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(mag_areas)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 677.474 seconds
Run 2: 687.035 seconds
Run 3: 717.031 seconds
Run 4: 761.756 seconds
Run 5: 737.409 seconds

Average runtime: 716.14 ± 31.23 s

Average Diverity: 3642.29 ± 0.00 s


In [84]:
dist_matrix_sub = dist_matrix_c[:20000, :20000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(20000, 20000)
(20000, 20000)


In [59]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = fast_PLDiv_approx(dist_matrix_sub)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")


mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 105.915 seconds
Run 2: 105.826 seconds
Run 3: 104.936 seconds
Run 4: 105.707 seconds
Run 5: 105.700 seconds

Average runtime: 105.62 ± 0.35 s

Average Diverity: 133.55 ± 0.00 s


In [60]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 71.818 seconds
Run 2: 70.010 seconds
Run 3: 68.473 seconds
Run 4: 66.892 seconds
Run 5: 65.557 seconds

Average runtime: 68.55 ± 2.21 s

Average Diverity: 133.58 ± 0.03 s


In [61]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 33.740 seconds
Run 2: 33.759 seconds
Run 3: 33.740 seconds
Run 4: 33.731 seconds
Run 5: 33.727 seconds

Average runtime: 33.74 ± 0.01 s

Average Diverity: 136.86 ± 0.03 s


In [62]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    vs = vendi.score_K(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(vs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 165.765 seconds
Run 2: 190.797 seconds
Run 3: 170.915 seconds
Run 4: 196.993 seconds
Run 5: 194.541 seconds

Average runtime: 183.80 ± 12.88 s

Average Diverity: 26.89 ± 0.00 s


In [63]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    dcs = dcscore(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(dcs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 0.219 seconds
Run 2: 0.228 seconds
Run 3: 0.220 seconds
Run 4: 0.218 seconds
Run 5: 0.228 seconds

Average runtime: 0.22 ± 0.00 s

Average Diverity: 1.60 ± 0.00 s


In [None]:
dist_matrix = cosine_distances(embeddings)
dist_matrix_sub = dist_matrix[:30000, :30000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(30000, 30000)
(30000, 30000)


In [65]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = fast_PLDiv_approx(dist_matrix_sub)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")


mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 237.205 seconds
Run 2: 236.378 seconds
Run 3: 234.966 seconds
Run 4: 236.710 seconds
Run 5: 235.884 seconds

Average runtime: 236.23 ± 0.76 s

Average Diverity: 184.93 ± 0.00 s


In [66]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 157.103 seconds
Run 2: 151.098 seconds
Run 3: 148.060 seconds
Run 4: 142.793 seconds
Run 5: 138.366 seconds

Average runtime: 147.48 ± 6.50 s

Average Diverity: 184.92 ± 0.02 s


In [67]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 67.577 seconds
Run 2: 67.867 seconds
Run 3: 67.387 seconds
Run 4: 69.484 seconds
Run 5: 68.442 seconds

Average runtime: 68.15 ± 0.76 s

Average Diverity: 190.23 ± 0.06 s


In [73]:
dist_matrix_sub = dist_matrix[:30000, :30000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(30000, 30000)
(30000, 30000)


In [74]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    vs = vendi.score_K(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(vs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 775.922 seconds
Run 2: 711.619 seconds
Run 3: 714.985 seconds
Run 4: 743.062 seconds
Run 5: 786.946 seconds

Average runtime: 746.51 ± 30.74 s

Average Diverity: 26.94 ± 0.00 s


In [75]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    dcs = dcscore(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(dcs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 0.489 seconds
Run 2: 0.463 seconds
Run 3: 0.448 seconds
Run 4: 0.465 seconds
Run 5: 0.453 seconds

Average runtime: 0.46 ± 0.01 s

Average Diverity: 1.60 ± 0.00 s


In [None]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric="precomputed")
    mag_areas = div.MagAreas()

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(mag_areas)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

In [76]:
dist_matrix = cosine_distances(embeddings)
dist_matrix_sub = dist_matrix[:40000, :40000]
sim_matrix = 1 - dist_matrix_sub
print(dist_matrix_sub.shape)
print(sim_matrix.shape)

(40000, 40000)
(40000, 40000)


In [77]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = fast_PLDiv_approx(dist_matrix_sub)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")


mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 463.522 seconds
Run 2: 462.247 seconds
Run 3: 462.063 seconds
Run 4: 462.685 seconds
Run 5: 463.241 seconds

Average runtime: 462.75 ± 0.56 s

Average Diverity: 232.89 ± 0.00 s


In [78]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 294.518 seconds
Run 2: 284.087 seconds
Run 3: 273.096 seconds
Run 4: 263.410 seconds
Run 5: 254.168 seconds

Average runtime: 273.86 ± 14.35 s

Average Diverity: 232.89 ± 0.06 s


In [79]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(pldiv)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 115.714 seconds
Run 2: 115.105 seconds
Run 3: 115.512 seconds
Run 4: 115.571 seconds
Run 5: 115.802 seconds

Average runtime: 115.54 ± 0.24 s

Average Diverity: 240.04 ± 0.27 s


In [80]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    vs = vendi.score_K(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(vs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 1705.192 seconds
Run 2: 1464.143 seconds
Run 3: 1890.320 seconds
Run 4: 1984.384 seconds
Run 5: 1886.496 seconds

Average runtime: 1786.11 ± 184.64 s

Average Diverity: 26.96 ± 0.00 s


In [81]:
runtimes = []
div_runs = []

for i in range(5):
    start_time = time.time()
    
    dcs = dcscore(sim_matrix)

    end_time = time.time()
    runtime = end_time - start_time
    runtimes.append(runtime)
    div_runs.append(dcs)
    
    print(f"Run {i+1}: {runtime:.3f} seconds")

mean_runtime = np.mean(runtimes)
std_runtime = np.std(runtimes)
mean_div = np.mean(div_runs)
std_div = np.std(div_runs)


print(f"\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s")
print(f"\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s")

Run 1: 1.001 seconds
Run 2: 0.920 seconds
Run 3: 1.000 seconds
Run 4: 0.907 seconds
Run 5: 0.888 seconds

Average runtime: 0.94 ± 0.05 s

Average Diverity: 1.60 ± 0.00 s
