In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
def load_data(path):
    x = None
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), 'rb') as f:
            tmp = tf.cast([np.load(f)], dtype=tf.float64)
            if x is None:
                x = tf.linalg.matmul(tmp, tmp, transpose_b=True)
            else:
                x = tf.concat([x, tf.linalg.matmul(tmp, tmp, transpose_b=True)], axis=0)
    return x

In [3]:
steps = 200
alphas, betas, gamma = [1.0, 0.1], [1000.0, 2000.0, 100.0], 0.01

In [4]:
def fusion(x, k):
    f = None
    n = x.shape[1]
    t = x.shape[0]
    for alpha in alphas:
        for beta in betas:
            reg = gamma / (4.0 * beta)
            wv, wv_u = tf.cast([1.0 / t for _ in range(t)], dtype=tf.float64), None
            i_alpha = tf.math.multiply(tf.eye(n, dtype=tf.float64), alpha)
            i_beta = tf.math.multiply(tf.eye(n, dtype=tf.float64), beta)
            s = tf.eye(n, dtype=tf.float64)
            for step in range(steps):
                s = tf.where(s > 0, s, tf.zeros(s.shape, dtype=tf.float64))
                s = tf.math.divide(tf.math.add(s, tf.transpose(s)), 2.0)
                z_sum = tf.zeros(s.shape, dtype=tf.float64)
                for i in range(t):
                    z = tf.linalg.solve(
                        tf.math.add(tf.math.add(x[i], i_alpha), tf.math.multiply(i_beta, wv[i])),
                        tf.math.add(tf.math.multiply(s, tf.math.multiply(wv[i], beta)), x[i]))
                    z = tf.where(z > 0, z, tf.zeros(z.shape, dtype=tf.float64))
                    z = tf.math.divide(tf.math.add(z, tf.transpose(z)), 2.0)
                    w = tf.math.divide(0.5, tf.norm(tf.math.subtract(z, s)))
                    wv_u = tf.reshape(w, [1]) if i == 0 else tf.concat([wv_u, [w]], axis=0)
                    z = tf.math.multiply(z, w)
                    z_sum = tf.math.add(z_sum, z)

                l = tf.math.subtract(tf.linalg.diag(tf.math.reduce_sum(s, 0)), s)
                (_, f) = tf.linalg.eigh(l)
                f = tf.slice(f, [0, 0], [f.shape[0], k])
                p = tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)
                for i in range(1, n):
                     p = tf.concat([p, tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)], 1)
                p = tf.math.multiply(tf.math.square(p), reg)
                wv = wv_u
                s_old = s
                s = tf.math.divide((tf.math.subtract(z_sum, p)), tf.math.reduce_sum(wv, 0))
                if step > 5 and tf.math.less(tf.math.divide(tf.norm(tf.math.subtract(s, s_old)), tf.norm(s_old)), 1.e-3):
                    print('Criterion found after', step, 'steps')
                    return f
    return f

In [5]:
from sklearn.cluster import KMeans

def run_k_means(views):
    x = load_data(views)
    print(x.shape)
    ks = [2]
    for k in ks:
        k_means = KMeans(n_clusters=k, n_init=10).fit(fusion(x, k).numpy())
        clusters = k_means.labels_
        d = {}
        for i in clusters:
            if i in d:
                d[i] += 1
            else:
                d[i] = 1
        print('k:', k, '\n', 'Clusters:', d)

In [6]:
run_k_means('views/nodes/both/')

(2, 733, 733)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 604, 1: 129}


In [7]:
run_k_means('views/nodes/words/')

(1, 733, 733)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 731, 1: 2}


In [8]:
run_k_means('views/nodes/files/')

(1, 733, 733)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 516, 1: 217}


In [9]:
run_k_means('views/two_views/')

(2, 69, 69)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 62, 1: 7}


In [10]:
run_k_means('views/single_words/')

(1, 69, 69)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 68, 1: 1}


In [11]:
run_k_means('views/single_files/')

(1, 69, 69)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 65, 1: 4}


In [14]:
run_k_means('views/all/')

(32, 69, 69)
Criterion found after 6 steps
k: 2 
 Clusters: {0: 68, 1: 1}
