# Check how n_init influences stability

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import corc.graph_metrics.neb
import corc.utils
import corc.our_datasets
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np


cache_path="../../cache"

In [None]:
possible_n_init = [1,5,10,20, 50]

num_models = 6

# Load the dataset
dataset = "mnist16"
X,y,tsne = corc.utils.load_dataset(dataset, cache_path=cache_path)
filename = "../../cache/n_init.pkl"

In [4]:
# try loading the cached file
try:
    with open(filename, 'rb') as f:
        all_tmms,all_aris = pickle.load(f)
except FileNotFoundError:
    all_tmms = dict()
    all_aris = dict()


for n_init in possible_n_init:
    if n_init in all_tmms.keys() and len(all_tmms[n_init]) == num_models:
        continue
    print(f"Running NEB with n_init={n_init}")
    all_tmms[n_init] = []
    all_aris[n_init] = []
    for model_idx in range(num_models):
        tmm = corc.graph_metrics.neb.NEB(
            data=X,
            labels=y, 
            n_components=25,
            n_init=n_init,
            n_neighbors=10,
            seed=42+model_idx * 100,
        )
        tmm.fit(X)
        all_aris[n_init].append(tmm.get_ari(X,y))
        all_tmms[n_init].append(tmm)
    with open(filename, 'wb') as f:
        pickle.dump((all_tmms, all_aris), f)

Running NEB with n_init=1
Mixture model fit took 0.76 seconds.
[[404 129]
 [285  39]
 [447  31]
 [297  36]
 [399 100]
 [346  20]
 [511  48]
 [238  53]
 [604  34]
 [381  27]
 [245  50]
 [333 189]
 [453  26]
 [279  36]
 [381  32]
 [460  38]
 [268  39]
 [371  18]
 [634  18]
 [390  30]
 [478  24]
 [449  43]
 [359  24]
 [459  35]
 [529  25]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:05<00:00,  5.80s/it]


Mixture model fit took 0.53 seconds.
[[527  25]
 [366  26]
 [396 101]
 [643  18]
 [446  42]
 [458  26]
 [350  36]
 [458  37]
 [339 189]
 [235  53]
 [592  32]
 [456  42]
 [500  37]
 [382  18]
 [245  50]
 [375  33]
 [344  20]
 [307  37]
 [363  24]
 [292  39]
 [273  38]
 [468  23]
 [403 127]
 [272  25]
 [510  48]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.09s/it]


Mixture model fit took 0.80 seconds.
[[371  32]
 [440  37]
 [351  27]
 [308 205]
 [346  36]
 [259  41]
 [563  26]
 [216  51]
 [599  35]
 [397 103]
 [437 130]
 [393  32]
 [489  20]
 [496  35]
 [218  56]
 [606  25]
 [508  24]
 [515  18]
 [455  43]
 [472  32]
 [481  47]
 [339  28]
 [285  48]
 [207  43]
 [249  41]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.09s/it]


Mixture model fit took 0.78 seconds.
[[393  30]
 [168  59]
 [458  28]
 [543  19]
 [392 103]
 [490  32]
 [613  24]
 [381  46]
 [337  24]
 [497  36]
 [318 197]
 [233  50]
 [487  23]
 [392  32]
 [310  39]
 [602  35]
 [459  20]
 [526  36]
 [251  41]
 [217  55]
 [433 130]
 [452  43]
 [214  47]
 [261  42]
 [573  25]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.10s/it]


Mixture model fit took 0.52 seconds.
[[432  40]
 [241  41]
 [398 100]
 [350  20]
 [484  25]
 [519  48]
 [337 186]
 [306  35]
 [237  31]
 [550  42]
 [399  28]
 [530  31]
 [444  42]
 [331  24]
 [432  31]
 [376  31]
 [451  34]
 [382  26]
 [213  54]
 [645  17]
 [403 128]
 [298  36]
 [367  33]
 [514  24]
 [361  31]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 0.51 seconds.
[[416  22]
 [398  99]
 [242  51]
 [457  37]
 [526  24]
 [353  23]
 [219  46]
 [234  51]
 [340 183]
 [456  26]
 [536  35]
 [618  33]
 [264  44]
 [291  40]
 [466  31]
 [448  29]
 [640  18]
 [376  17]
 [402 128]
 [635  26]
 [373  31]
 [213  43]
 [486  24]
 [265  42]
 [346  20]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Running NEB with n_init=5
Mixture model fit took 2.71 seconds.
[[246  50]
 [239  50]
 [405 126]
 [467  43]
 [339 188]
 [523  25]
 [240  52]
 [419  30]
 [512  47]
 [317  22]
 [452  38]
 [505  37]
 [447  36]
 [397 100]
 [378  25]
 [448  42]
 [464  32]
 [435  30]
 [650  17]
 [433  28]
 [288  40]
 [511  24]
 [291  35]
 [329  20]
 [265  27]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 2.54 seconds.
[[527  25]
 [366  26]
 [396 101]
 [643  18]
 [446  42]
 [458  26]
 [350  36]
 [458  37]
 [339 189]
 [235  53]
 [592  32]
 [456  42]
 [500  37]
 [382  18]
 [245  50]
 [375  33]
 [344  20]
 [307  37]
 [363  24]
 [292  39]
 [273  38]
 [468  23]
 [403 127]
 [272  25]
 [510  48]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.15s/it]


Mixture model fit took 2.95 seconds.
[[244  51]
 [400 101]
 [277  49]
 [455  42]
 [520  27]
 [520  27]
 [446  26]
 [655  18]
 [363  38]
 [421  49]
 [513  48]
 [379  34]
 [348  20]
 [408 127]
 [289  35]
 [269  27]
 [509  37]
 [278  40]
 [444  36]
 [372  26]
 [281  41]
 [244  52]
 [329 192]
 [462  37]
 [574  23]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.11s/it]


Mixture model fit took 2.77 seconds.
[[393  30]
 [168  59]
 [458  28]
 [543  19]
 [392 103]
 [490  32]
 [613  24]
 [381  46]
 [337  24]
 [497  36]
 [318 197]
 [233  50]
 [487  23]
 [392  32]
 [310  39]
 [602  35]
 [459  20]
 [526  36]
 [251  41]
 [217  55]
 [433 130]
 [452  43]
 [214  47]
 [261  42]
 [573  25]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.10s/it]


Mixture model fit took 2.57 seconds.
[[210  61]
 [469  31]
 [438  30]
 [505  47]
 [293  29]
 [461  43]
 [398 101]
 [353  26]
 [449  42]
 [376  26]
 [636  18]
 [358  24]
 [286  39]
 [629  34]
 [409  25]
 [405 128]
 [357  33]
 [260  29]
 [336 190]
 [464  37]
 [524  25]
 [507  37]
 [235  54]
 [350  20]
 [292  26]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 2.65 seconds.
[[238  32]
 [339  26]
 [512  24]
 [556  42]
 [423  41]
 [628  34]
 [483  31]
 [339 189]
 [353  20]
 [457  45]
 [646  18]
 [246  41]
 [339  33]
 [262  36]
 [555  25]
 [403 128]
 [360  34]
 [442  30]
 [264  41]
 [344  26]
 [312  36]
 [455  42]
 [210  58]
 [397 100]
 [437  30]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Running NEB with n_init=10
Mixture model fit took 5.27 seconds.
[[246  50]
 [239  50]
 [405 126]
 [467  43]
 [339 188]
 [523  25]
 [240  52]
 [419  30]
 [512  47]
 [317  22]
 [452  38]
 [505  37]
 [447  36]
 [397 100]
 [378  25]
 [448  42]
 [464  32]
 [435  30]
 [650  17]
 [433  28]
 [288  40]
 [511  24]
 [291  35]
 [329  20]
 [265  27]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 5.09 seconds.
[[248  21]
 [232  42]
 [476  31]
 [399 100]
 [426  43]
 [415  29]
 [330  21]
 [433  30]
 [462  44]
 [256  29]
 [359  34]
 [511  36]
 [514  25]
 [384  21]
 [400  33]
 [382  20]
 [422  40]
 [336 190]
 [308  41]
 [627  34]
 [557  41]
 [238  50]
 [406 127]
 [379  24]
 [500  23]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 5.44 seconds.
[[244  51]
 [400 101]
 [277  49]
 [455  42]
 [520  27]
 [520  27]
 [446  26]
 [655  18]
 [363  38]
 [421  49]
 [513  48]
 [379  34]
 [348  20]
 [408 127]
 [289  35]
 [269  27]
 [509  37]
 [278  40]
 [444  36]
 [372  26]
 [281  41]
 [244  52]
 [329 192]
 [462  37]
 [574  23]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.15s/it]


Mixture model fit took 5.33 seconds.
[[243  50]
 [545  26]
 [277  36]
 [445  43]
 [477  34]
 [402 129]
 [460  38]
 [492  28]
 [468  31]
 [618  34]
 [306  36]
 [384  26]
 [355  20]
 [283  49]
 [282  40]
 [242  41]
 [247  40]
 [399 100]
 [277  42]
 [656  18]
 [367  33]
 [509  24]
 [559  24]
 [336 188]
 [371  32]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.19s/it]


Mixture model fit took 5.13 seconds.
[[281  40]
 [262  42]
 [469  24]
 [398 101]
 [244  41]
 [455  42]
 [380  25]
 [460  44]
 [628  34]
 [350  20]
 [404 128]
 [258  28]
 [524  25]
 [338 190]
 [344  26]
 [356  33]
 [436  30]
 [639  18]
 [510  36]
 [391  20]
 [460  37]
 [241  52]
 [469  31]
 [454  28]
 [249  49]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 5.21 seconds.
[[238  32]
 [339  26]
 [512  24]
 [556  42]
 [423  41]
 [628  34]
 [483  31]
 [339 189]
 [353  20]
 [457  45]
 [646  18]
 [246  41]
 [339  33]
 [262  36]
 [555  25]
 [403 128]
 [360  34]
 [442  30]
 [264  41]
 [344  26]
 [312  36]
 [455  42]
 [210  58]
 [397 100]
 [437  30]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Running NEB with n_init=20
Mixture model fit took 10.33 seconds.
[[246  50]
 [239  50]
 [405 126]
 [467  43]
 [339 188]
 [523  25]
 [240  52]
 [419  30]
 [512  47]
 [317  22]
 [452  38]
 [505  37]
 [447  36]
 [397 100]
 [378  25]
 [448  42]
 [464  32]
 [435  30]
 [650  17]
 [433  28]
 [288  40]
 [511  24]
 [291  35]
 [329  20]
 [265  27]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 10.25 seconds.
[[317  43]
 [295  36]
 [498  29]
 [406 128]
 [481  32]
 [340 188]
 [459  38]
 [423  31]
 [242  52]
 [412  23]
 [960  19]
 [377  25]
 [281  40]
 [476  44]
 [263  46]
 [417  43]
 [511  36]
 [230  42]
 [249  51]
 [397 101]
 [441  36]
 [265  28]
 [209  30]
 [545  25]
 [506  24]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.12s/it]


Mixture model fit took 10.69 seconds.
[[244  51]
 [400 101]
 [277  49]
 [455  42]
 [520  27]
 [520  27]
 [446  26]
 [655  18]
 [363  38]
 [421  49]
 [513  48]
 [379  34]
 [348  20]
 [408 127]
 [289  35]
 [269  27]
 [509  37]
 [278  40]
 [444  36]
 [372  26]
 [281  41]
 [244  52]
 [329 192]
 [462  37]
 [574  23]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.12s/it]


Mixture model fit took 10.45 seconds.
[[243  50]
 [545  26]
 [277  36]
 [445  43]
 [477  34]
 [402 129]
 [460  38]
 [492  28]
 [468  31]
 [618  34]
 [306  36]
 [384  26]
 [355  20]
 [283  49]
 [282  40]
 [242  41]
 [247  40]
 [399 100]
 [277  42]
 [656  18]
 [367  33]
 [509  24]
 [559  24]
 [336 188]
 [371  32]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 10.39 seconds.
[[459  44]
 [267  41]
 [237  52]
 [398 100]
 [433  30]
 [371  19]
 [352  25]
 [242  42]
 [403 128]
 [451  28]
 [356  33]
 [286  40]
 [628  34]
 [452  43]
 [485  24]
 [510  36]
 [471  31]
 [459  38]
 [338 190]
 [383  24]
 [524  25]
 [249  49]
 [257  29]
 [640  18]
 [349  20]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 10.29 seconds.
[[238  32]
 [339  26]
 [512  24]
 [556  42]
 [423  41]
 [628  34]
 [483  31]
 [339 189]
 [353  20]
 [457  45]
 [646  18]
 [246  41]
 [339  33]
 [262  36]
 [555  25]
 [403 128]
 [360  34]
 [442  30]
 [264  41]
 [344  26]
 [312  36]
 [455  42]
 [210  58]
 [397 100]
 [437  30]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Running NEB with n_init=50
Mixture model fit took 25.79 seconds.
[[519  48]
 [403 126]
 [344  31]
 [293  34]
 [428  31]
 [449  36]
 [182  30]
 [335  33]
 [340 184]
 [552  42]
 [463  47]
 [506  24]
 [295  27]
 [404  27]
 [445  42]
 [223  52]
 [357  19]
 [641  18]
 [397 101]
 [247  50]
 [430  41]
 [506  24]
 [426  30]
 [325  23]
 [490  31]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.13s/it]


Mixture model fit took 25.53 seconds.
[[317  43]
 [295  36]
 [498  29]
 [406 128]
 [481  32]
 [340 188]
 [459  38]
 [423  31]
 [242  52]
 [412  23]
 [960  19]
 [377  25]
 [281  40]
 [476  44]
 [263  46]
 [417  43]
 [511  36]
 [230  42]
 [249  51]
 [397 101]
 [441  36]
 [265  28]
 [209  30]
 [545  25]
 [506  24]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 25.57 seconds.
[[362  21]
 [465  38]
 [316  36]
 [521  26]
 [272  40]
 [531  25]
 [400  99]
 [641  18]
 [336 187]
 [611  35]
 [622  27]
 [280  40]
 [414  22]
 [246  41]
 [453  43]
 [373  33]
 [405 129]
 [240  52]
 [384  24]
 [309  25]
 [208  64]
 [263  42]
 [472  30]
 [426  34]
 [450  32]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]


Mixture model fit took 25.97 seconds.
[[267  27]
 [339 188]
 [445  36]
 [268  41]
 [467  43]
 [405 127]
 [551  42]
 [245  41]
 [353  20]
 [361  25]
 [211  60]
 [293  36]
 [376  25]
 [529  25]
 [505  37]
 [472  30]
 [316  26]
 [435  30]
 [530  25]
 [429  40]
 [452  42]
 [649  18]
 [247  50]
 [458  32]
 [397 101]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.19s/it]


Mixture model fit took 25.95 seconds.
[[321  34]
 [368 116]
 [555  42]
 [617  34]
 [448  29]
 [524  25]
 [355  26]
 [323  25]
 [449  43]
 [371  32]
 [467  30]
 [361  20]
 [533  25]
 [208  61]
 [386  26]
 [278 220]
 [185 156]
 [429  39]
 [466  31]
 [530  35]
 [637  18]
 [245  41]
 [303 137]
 [368  32]
 [273  41]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.17s/it]


Mixture model fit took 26.08 seconds.
[[277  35]
 [455  45]
 [562  42]
 [635  18]
 [466  30]
 [446  42]
 [397 100]
 [315  25]
 [540  25]
 [251  31]
 [440  29]
 [339 189]
 [460  32]
 [289  37]
 [422  42]
 [404 127]
 [531  25]
 [363  25]
 [260  47]
 [355  29]
 [253  51]
 [211  60]
 [360  20]
 [513  47]
 [456  36]]
After filtering 25 components, we are left with 25 components


t-NEB: 100%|██████████| 1/1 [00:03<00:00,  3.15s/it]


In [5]:
all_aris

{1: [0.8220948832282708,
  0.7724012959576083,
  0.923370267963385,
  0.9172635379532691,
  0.8041751123355706,
  0.818456867087472],
 5: [0.803146991384419,
  0.7724012959576083,
  0.7928545100702789,
  0.9172635379532691,
  0.8049897881065575,
  0.8290625190964208],
 10: [0.803146991384419,
  0.8113183751613736,
  0.7928545100702789,
  0.8071291181070189,
  0.8075988869954716,
  0.8290625190964208],
 20: [0.803146991384419,
  0.8029414254912194,
  0.7928545100702789,
  0.8071291181070189,
  0.8065572504343851,
  0.8290625190964208],
 50: [0.7390889418281847,
  0.8029414254912194,
  0.8211381436491753,
  0.8083355660741832,
  0.8115001180402955,
  0.807432588936563]}

In [6]:
for n_init in all_aris.keys():
    avg_ari = np.mean(all_aris[n_init])
    std_ari = np.std(all_aris[n_init], ddof=1)
    print(f"n_init: {n_init}, avg_ari: {avg_ari:.4f}, std_ari: {std_ari:.4f}")

n_init: 1, avg_ari: 0.8430, std_ari: 0.0625
n_init: 5, avg_ari: 0.8200, std_ari: 0.0511
n_init: 10, avg_ari: 0.8085, std_ari: 0.0119
n_init: 20, avg_ari: 0.8069, std_ari: 0.0120
n_init: 50, avg_ari: 0.7984, std_ari: 0.0297


{1: [0.8149319189414314,
  0.8107508775419262,
  0.7375568043612805,
  0.729552049285818,
  0.8050739129542397,
  0.7270526172305195],
 5: [0.7375568043612805,
  0.7375568043612805,
  0.7375568043612805,
  0.8073045470167569,
  0.8101493767361339,
  0.837058201611172],
 10: [0.7375568043612805,
  0.7375568043612805,
  0.7375568043612805,
  0.837058201611172,
  0.837058201611172,
  0.837058201611172],
 20: [0.7375568043612805,
  0.7375568043612805,
  0.7375568043612805,
  0.837058201611172,
  0.837058201611172,
  0.837058201611172],
 50: [0.810330467595892,
  0.810330467595892,
  0.810330467595892,
  0.810330467595892,
  0.810330467595892,
  0.810330467595892]}