In [None]:
import numpy as np

import pandas as pd
from numpy import savetxt

import scipy.io

from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

def run_Kmeans(x, y, k=3, time=100, return_NMI=False):

    x = np.array(x)
    x = np.squeeze(x)
    y = np.array(y)

    if len(y.shape) > 1:
        y = np.argmax(y, axis=1)

    estimator = KMeans(n_clusters=k)
    ARI_list = []  
    NMI_list = []
    if time:
        for i in range(time):
            estimator.fit(x, y)
            y_pred = estimator.predict(x)
            score = normalized_mutual_info_score(y, y_pred)
            NMI_list.append(score)
            s2 = adjusted_rand_score(y, y_pred)
            ARI_list.append(s2)
        
        score = sum(NMI_list) / len(NMI_list)
        s2 = sum(ARI_list) / len(ARI_list)
        print('NMI (100 avg): {:.4f} , ARI (100 avg): {:.4f}'.format(score, s2))
        
        dfobjCoord = pd.DataFrame(data=y_pred)
        np.savetxt('y_pred.txt', dfobjCoord.values, fmt='%i' )

    else:
        estimator.fit(x, y)
        y_pred = estimator.predict(x)
        score = normalized_mutual_info_score(y, y_pred)
        print("NMI on all label data: {:.5f}".format(score))
    if return_NMI:
        dfobjCoord = pd.DataFrame(data=y_pred)
        np.savetxt('y_pred.txt', dfobjCoord.values, fmt='%i' )
        
        return score, s2

In [None]:
dataset = "acm"
path = "embeddings/"
filename = 'objCoord-'+dataset+'-9D-100e15.mat'

X = scipy.io.loadmat(path + filename)['objCoord']

#flickr clusters => 7; acm clusters => 3; dblp clusters => 3; imdb clusters => 3; brainA ^ brainH clusters => 6
clusters = 3

path2 = 'data/'+dataset+'/'
Y = np.loadtxt(path2 + "ground_truth.txt")

In [None]:
run_Kmeans(X,Y, k=clusters)