# Metis

In [5]:
from scipy.sparse import lil_matrix, csr_matrix
import numpy as np
import metis
from clustering_evaluation import computeEvaluationMeasures, MEASURES
import glob
from tabulate import tabulate
from tqdm import tqdm


def _loadSparseCSRFromDisk(fullPath):
    spD = None
    with open(fullPath, 'rb') as f:
        data = np.load(f)
        indices = np.load(f)
        indptr = np.load(f)
        spD = csr_matrix((data, indices, indptr))
        # convert this nxk matrix into a similarity.
    return spD

def getLabelsFromFile(fullPath):
    v = np.loadtxt(fullPath, delimiter=',', dtype=np.int)
    return v

def detectPostfixesInDir(fullPath, dirName):
    fnLst = glob.glob('{0}/{1}/*labels_*.out'.format(fullPath, dirName))
    # labels_NCF_DS_
    postFixes = [f.split("/")[-1].replace('labels_NCF_DS_','').replace('.out','') for f in fnLst]
    return postFixes

def buildBipartiteAdj(A):
    n,d = A.shape
    newA = lil_matrix((n+d,n+d), dtype=np.int8)

    lil_A = lil_matrix(A)
    newA[0:n,n:] = lil_A
    newA[n:,0:n] = lil_A.transpose()
    
    # As adjacency list with node weights
    adjLst = []
    nodews = []
    for i in range(n):
        adjLst.append(list(newA[i].nonzero()[1]))
        nodews.append((1,0))
    for i in range(n,n+d):
        adjLst.append(list(newA[i].nonzero()[1]))
        nodews.append((0,1))
        
    return adjLst, nodews

def perform_evaluate_runs(dsDirPath, nruns=10, threshold=0.0):
    trueLblsFname = "{0}/{1}/{2}".format(path, dsDirPath, 'labels.true')
    trueL = getLabelsFromFile(trueLblsFname)
    postFixesDS = detectPostfixesInDir(path, dsDirPath)
    
    n_clusters = np.unique(trueL).shape[0]
    
    dsresults = []
    header = ['seed']
    header.extend(MEASURES)
    np.random.seed(87292)

    for run in tqdm(range(nruns)): # the partitioning method has a stochastic part, hence several runs are needed.
        runDsResults = [] # a single run passes through all the npy matrices with the same seed
        runSeed = np.random.randint(0, high=2e8)
        for runPostfix in postFixesDS:#tqdm(postFixesDS):
            #print("{0}/{1}{2}_csr.npy".format(dsDirPath, 'simmatrix_DS_', runPostfix))
            simFname = "{0}/{1}/{2}{3}_csr.npy".format(path, dsDirPath, 'simmatrix_DS_', runPostfix)
            D = _loadSparseCSRFromDisk(simFname)

            adjLst, nodews = buildBipartiteAdj(csr_matrix(D>threshold, dtype=np.int8)) 
            metisG = metis.adjlist_to_metis(adjLst, nodew=nodews)
            (edgecuts, parts) = metis.part_graph(metisG, n_clusters, recursive=True, seed=runSeed)

            perf = computeEvaluationMeasures(trueL, parts[0:D.shape[0]])
            runDsResults.append([perf[m] for m in MEASURES])

        runRow = [runSeed]
        runRow.extend(list(np.mean(runDsResults, axis=0)))
        dsresults.append( runRow ) # average

    dsresults.append( ['Average'] + list(np.mean(dsresults, axis=0)[1:]) )
    print(tabulate(dsresults, headers=header, tablefmt='github', floatfmt='.6f', showindex=True))
    
path = '/home/juan/Insync/juan.zamora@pucv.cl/Google Drive/Research - Multiview and Collaborative Clustering/code/handwritten_tezt/similarity_matrices_after_merging'

## BBC-seg4

In [9]:
perform_evaluate_runs('BBC-seg4', threshold=0.00) # not an important difference with the default thresold

100%|██████████| 10/10 [00:10<00:00,  1.09s/it]

|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.425108 | 0.724380 | 0.049256 | 0.044672 | 0.534074 | 0.058773 | 0.044672 | 0.469580 |
|  1 | 144593887 | 0.439837 | 0.743212 | 0.159990 | 0.173139 | 0.518924 | 0.161097 | 0.173139 | 0.473120 |
|  2 | 23880047  | 0.416521 | 0.727737 | 0.055801 | 0.050073 | 0.542907 | 0.066658 | 0.050073 | 0.475774 |
|  3 | 136596323 | 0.405848 | 0.736204 | 0.047248 | 0.044088 | 0.553889 | 0.055479 | 0.044088 | 0.489632 |
|  4 | 162763079 | 0.412394 | 0.730219 | 0.048935 | 0.045693 | 0.547155 | 0.057596 | 0.045693 | 0.480750 |
|  5 | 96272240  | 0.385958 | 0.745109 | 0.169535 | 0.215182 | 0.574348 | 0.150522 | 0.215182 | 0.506073 |
|  6 | 173435422 | 0.414439 | 0.730657 | 0.054292 | 0.050365 | 0.545049 | 0.063602 | 0.050365 | 0.481829 |
|  7 | 59971465  | 0.420003 | 0.72759




|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.425108 | 0.724380 | 0.049256 | 0.044672 | 0.534074 | 0.058773 | 0.044672 | 0.469580 |
|  1 | 144593887 | 0.439837 | 0.743212 | 0.159990 | 0.173139 | 0.518924 | 0.161097 | 0.173139 | 0.473120 |
|  2 | 23880047  | 0.416521 | 0.727737 | 0.055801 | 0.050073 | 0.542907 | 0.066658 | 0.050073 | 0.475774 |
|  3 | 136596323 | 0.405848 | 0.736204 | 0.047248 | 0.044088 | 0.553889 | 0.055479 | 0.044088 | 0.489632 |
|  4 | 162763079 | 0.412394 | 0.730219 | 0.048935 | 0.045693 | 0.547155 | 0.057596 | 0.045693 | 0.480750 |
|  5 | 96272240  | 0.385958 | 0.745109 | 0.169535 | 0.215182 | 0.574348 | 0.150522 | 0.215182 | 0.506073 |
|  6 | 173435422 | 0.414439 | 0.730657 | 0.054292 | 0.050365 | 0.545049 | 0.063602 | 0.050365 | 0.481829 |
|  7 | 59971465  | 0.420003 | 0.727591 | 0.058726 | 0.054891 | 0.539325 | 0.068066 | 0.054891 | 0.474210 |
|  8 | 15877837  | 0.437136 | 0.680000 | 0.132085 | 0.147737 | 0.521697 | 0.130822 | 0.147737 | 0.428170 |
|  9 | 84982878  | 0.405610 | 0.735620 | 0.047587 | 0.043942 | 0.554132 | 0.055953 | 0.043942 | 0.489884 |
| 10 | Average   | 0.416285 | 0.728073 | 0.082346 | 0.086978 | 0.543150 | 0.086857 | 0.086978 | 0.476902 |

$\mu : 0.07$

|    | seed      |     E |     P |    F1 |   ACC |   NMI |   PREC |   REC |   ARI |
|----|-----------|-------|-------|-------|-------|-------|--------|-------|-------|
|  0 | 35233113  | 0.346 | 0.757 | 0.130 | 0.112 | 0.616 |  0.157 | 0.112 | 0.525 |
|  1 | 144593887 | 0.356 | 0.753 | 0.106 | 0.133 | 0.605 |  0.099 | 0.133 | 0.521 |
|  2 | 23880047  | 0.409 | 0.747 | 0.038 | 0.035 | 0.551 |  0.046 | 0.035 | 0.466 |
|  3 | 136596323 | 0.404 | 0.746 | 0.086 | 0.094 | 0.556 |  0.092 | 0.094 | 0.463 |
|  4 | 162763079 | 0.389 | 0.753 | 0.097 | 0.111 | 0.571 |  0.098 | 0.111 | 0.481 |
|  5 | 96272240  | 0.401 | 0.748 | 0.148 | 0.185 | 0.559 |  0.133 | 0.185 | 0.470 |
|  6 | 173435422 | 0.430 | 0.745 | 0.147 | 0.149 | 0.529 |  0.155 | 0.149 | 0.459 |
|  7 | 59971465  | 0.423 | 0.742 | 0.043 | 0.041 | 0.536 |  0.048 | 0.041 | 0.457 |
|  8 | 15877837  | 0.379 | 0.743 | 0.182 | 0.185 | 0.581 |  0.201 | 0.185 | 0.497 |
|  9 | 84982878  | 0.389 | 0.752 | 0.093 | 0.111 | 0.571 |  0.090 | 0.111 | 0.477 |
| 10 | Average   | 0.393 | 0.749 | 0.107 | 0.116 | 0.567 |  0.112 | 0.116 | 0.482 |

## Caltech

In [6]:
perform_evaluate_runs('Caltech', threshold=0.00) # no important difference with the default thresold

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 10/10 [00:57<00:00,  5.71s/it]

|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.564359 | 0.510645 | 0.088424 | 0.071165 | 0.249134 | 0.216395 | 0.071165 | 0.100262 |
|  1 | 144593887 | 0.582064 | 0.493630 | 0.064772 | 0.051676 | 0.229505 | 0.175967 | 0.051676 | 0.082443 |
|  2 | 23880047  | 0.571298 | 0.482691 | 0.044969 | 0.037008 | 0.242052 | 0.117606 | 0.037008 | 0.087942 |
|  3 | 136596323 | 0.570782 | 0.510226 | 0.082472 | 0.066220 | 0.243440 | 0.205506 | 0.066220 | 0.102905 |
|  4 | 162763079 | 0.579307 | 0.487804 | 0.059938 | 0.047024 | 0.232284 | 0.164922 | 0.047024 | 0.086275 |
|  5 | 96272240  | 0.594239 | 0.485960 | 0.043958 | 0.038307 | 0.216719 | 0.114041 | 0.038307 | 0.091565 |
|  6 | 173435422 | 0.549333 | 0.519447 | 0.035305 | 0.031727 | 0.266507 | 0.085940 | 0.031727 | 0.111451 |
|  7 | 59971465  | 0.572588 | 0.49681




|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.564359 | 0.510645 | 0.088424 | 0.071165 | 0.249134 | 0.216395 | 0.071165 | 0.100262 |
|  1 | 144593887 | 0.582064 | 0.493630 | 0.064772 | 0.051676 | 0.229505 | 0.175967 | 0.051676 | 0.082443 |
|  2 | 23880047  | 0.571298 | 0.482691 | 0.044969 | 0.037008 | 0.242052 | 0.117606 | 0.037008 | 0.087942 |
|  3 | 136596323 | 0.570782 | 0.510226 | 0.082472 | 0.066220 | 0.243440 | 0.205506 | 0.066220 | 0.102905 |
|  4 | 162763079 | 0.579307 | 0.487804 | 0.059938 | 0.047024 | 0.232284 | 0.164922 | 0.047024 | 0.086275 |
|  5 | 96272240  | 0.594239 | 0.485960 | 0.043958 | 0.038307 | 0.216719 | 0.114041 | 0.038307 | 0.091565 |
|  6 | 173435422 | 0.549333 | 0.519447 | 0.035305 | 0.031727 | 0.266507 | 0.085940 | 0.031727 | 0.111451 |
|  7 | 59971465  | 0.572588 | 0.496815 | 0.063541 | 0.053856 | 0.241163 | 0.143191 | 0.053856 | 0.099256 |
|  8 | 15877837  | 0.547125 | 0.527661 | 0.076548 | 0.062909 | 0.269141 | 0.206320 | 0.062909 | 0.108325 |
|  9 | 84982878  | 0.577209 | 0.500126 | 0.065779 | 0.053101 | 0.235055 | 0.162879 | 0.053101 | 0.100162 |
| 10 | Average   | 0.570830 | 0.501500 | 0.062571 | 0.051299 | 0.242500 | 0.159277 | 0.051299 | 0.097059 |

## Handwritten

In [7]:
perform_evaluate_runs('handwritten', threshold=0.00) # no important difference with the default thresold

100%|██████████| 10/10 [00:45<00:00,  4.59s/it]

|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.561767 | 0.549050 | 0.022343 | 0.022600 | 0.439859 | 0.022251 | 0.022600 | 0.317991 |
|  1 | 144593887 | 0.533175 | 0.583100 | 0.057012 | 0.051500 | 0.468221 | 0.065787 | 0.051500 | 0.357029 |
|  2 | 23880047  | 0.596028 | 0.521450 | 0.034019 | 0.034100 | 0.405377 | 0.034858 | 0.034100 | 0.291221 |
|  3 | 136596323 | 0.499963 | 0.631050 | 0.034365 | 0.035500 | 0.500378 | 0.033420 | 0.035500 | 0.409154 |
|  4 | 162763079 | 0.501799 | 0.610500 | 0.071189 | 0.064750 | 0.499600 | 0.081255 | 0.064750 | 0.393042 |
|  5 | 96272240  | 0.562241 | 0.537700 | 0.144070 | 0.143800 | 0.439558 | 0.145670 | 0.143800 | 0.314904 |
|  6 | 173435422 | 0.549996 | 0.566700 | 0.089039 | 0.078600 | 0.451412 | 0.105417 | 0.078600 | 0.347889 |
|  7 | 59971465  | 0.554929 | 0.56400




|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.561767 | 0.549050 | 0.022343 | 0.022600 | 0.439859 | 0.022251 | 0.022600 | 0.317991 |
|  1 | 144593887 | 0.533175 | 0.583100 | 0.057012 | 0.051500 | 0.468221 | 0.065787 | 0.051500 | 0.357029 |
|  2 | 23880047  | 0.596028 | 0.521450 | 0.034019 | 0.034100 | 0.405377 | 0.034858 | 0.034100 | 0.291221 |
|  3 | 136596323 | 0.499963 | 0.631050 | 0.034365 | 0.035500 | 0.500378 | 0.033420 | 0.035500 | 0.409154 |
|  4 | 162763079 | 0.501799 | 0.610500 | 0.071189 | 0.064750 | 0.499600 | 0.081255 | 0.064750 | 0.393042 |
|  5 | 96272240  | 0.562241 | 0.537700 | 0.144070 | 0.143800 | 0.439558 | 0.145670 | 0.143800 | 0.314904 |
|  6 | 173435422 | 0.549996 | 0.566700 | 0.089039 | 0.078600 | 0.451412 | 0.105417 | 0.078600 | 0.347889 |
|  7 | 59971465  | 0.554929 | 0.564000 | 0.083922 | 0.086100 | 0.446720 | 0.082608 | 0.086100 | 0.330560 |
|  8 | 15877837  | 0.556490 | 0.521000 | 0.074403 | 0.072850 | 0.444787 | 0.078433 | 0.072850 | 0.317447 |
|  9 | 84982878  | 0.536880 | 0.581900 | 0.152623 | 0.161200 | 0.464939 | 0.146547 | 0.161200 | 0.354900 |
| 10 | Average   | 0.545327 | 0.566645 | 0.076299 | 0.075100 | 0.456085 | 0.079624 | 0.075100 | 0.343414 |

## NusWide

In [8]:
perform_evaluate_runs('NusWide', threshold=0.00) # no important difference with the default thresold

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.856827 | 0.170870 | 0.035455 | 0.030637 | 0.038918 | 0.056949 | 0.030637 | 0.010543 |
|  1 | 144593887 | 0.860202 | 0.170973 | 0.037205 | 0.031823 | 0.035317 | 0.057615 | 0.031823 | 0.009699 |
|  2 | 23880047  | 0.858370 | 0.170537 | 0.037083 | 0.031937 | 0.037268 | 0.058001 | 0.031937 | 0.010540 |
|  3 | 136596323 | 0.860105 | 0.162510 | 0.041210 | 0.035827 | 0.035432 | 0.063918 | 0.035827 | 0.009254 |
|  4 | 162763079 | 0.855250 | 0.175817 | 0.047052 | 0.038860 | 0.040569 | 0.079678 | 0.038860 | 0.011349 |
|  5 | 96272240  | 0.859282 | 0.172133 | 0.034361 | 0.029537 | 0.036305 | 0.054475 | 0.029537 | 0.010040 |
|  6 | 173435422 | 0.856555 | 0.173293 | 0.035343 | 0.030553 | 0.039071 | 0.056364 | 0.030553 | 0.010959 |
|  7 | 59971465  | 0.859421 | 0.17163




|    | seed      |        E |        P |       F1 |      ACC |      NMI |     PREC |      REC |      ARI |
|----|-----------|----------|----------|----------|----------|----------|----------|----------|----------|
|  0 | 35233113  | 0.856827 | 0.170870 | 0.035455 | 0.030637 | 0.038918 | 0.056949 | 0.030637 | 0.010543 |
|  1 | 144593887 | 0.860202 | 0.170973 | 0.037205 | 0.031823 | 0.035317 | 0.057615 | 0.031823 | 0.009699 |
|  2 | 23880047  | 0.858370 | 0.170537 | 0.037083 | 0.031937 | 0.037268 | 0.058001 | 0.031937 | 0.010540 |
|  3 | 136596323 | 0.860105 | 0.162510 | 0.041210 | 0.035827 | 0.035432 | 0.063918 | 0.035827 | 0.009254 |
|  4 | 162763079 | 0.855250 | 0.175817 | 0.047052 | 0.038860 | 0.040569 | 0.079678 | 0.038860 | 0.011349 |
|  5 | 96272240  | 0.859282 | 0.172133 | 0.034361 | 0.029537 | 0.036305 | 0.054475 | 0.029537 | 0.010040 |
|  6 | 173435422 | 0.856555 | 0.173293 | 0.035343 | 0.030553 | 0.039071 | 0.056364 | 0.030553 | 0.010959 |
|  7 | 59971465  | 0.859421 | 0.171633 | 0.035151 | 0.030240 | 0.036142 | 0.055494 | 0.030240 | 0.010125 |
|  8 | 15877837  | 0.854602 | 0.177797 | 0.044551 | 0.036750 | 0.041262 | 0.075255 | 0.036750 | 0.011483 |
|  9 | 84982878  | 0.857968 | 0.172430 | 0.038118 | 0.032727 | 0.037700 | 0.059992 | 0.032727 | 0.010859 |
| 10 | Average   | 0.857858 | 0.171799 | 0.038553 | 0.032889 | 0.037798 | 0.061774 | 0.032889 | 0.010485 |