In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
def load_data(path):
    x = None
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), 'rb') as f:
            tmp = tf.cast([np.load(f)], dtype=tf.float64)
            if x is None:
                x = tf.linalg.matmul(tmp, tmp, transpose_b=True)
            else:
                x = tf.concat([x, tf.linalg.matmul(tmp, tmp, transpose_b=True)], axis=0)
    return x

In [3]:
steps = 200
alphas, betas, gammas = [1.0, 0.1], [1000.0, 2000.0, 100.0], [0.01, 0.1]

In [4]:
def fusion(x, k, alpha, beta, gamma):
    f = None
    n = x.shape[1]
    t = x.shape[0]
    reg = gamma / (4.0 * beta)
    wv, wv_u = tf.cast([1.0 / t for _ in range(t)], dtype=tf.float64), None
    i_alpha = tf.math.multiply(tf.eye(n, dtype=tf.float64), alpha)
    i_beta = tf.math.multiply(tf.eye(n, dtype=tf.float64), beta)
    s = tf.eye(n, dtype=tf.float64)
    for step in range(steps):
        s = tf.where(s > 0, s, tf.zeros(s.shape, dtype=tf.float64))
        s = tf.math.divide(tf.math.add(s, tf.transpose(s)), 2.0)
        z_sum = tf.zeros(s.shape, dtype=tf.float64)
        for i in range(t):
            z = tf.linalg.solve(
                tf.math.add(tf.math.add(x[i], i_alpha), tf.math.multiply(i_beta, wv[i])),
                tf.math.add(tf.math.multiply(s, tf.math.multiply(wv[i], beta)), x[i]))
            z = tf.where(z > 0, z, tf.zeros(z.shape, dtype=tf.float64))
            z = tf.math.divide(tf.math.add(z, tf.transpose(z)), 2.0)
            w = tf.math.divide(0.5, tf.norm(tf.math.subtract(z, s)))
            wv_u = tf.reshape(w, [1]) if i == 0 else tf.concat([wv_u, [w]], axis=0)
            z = tf.math.multiply(z, w)
            z_sum = tf.math.add(z_sum, z)

        l = tf.math.subtract(tf.linalg.diag(tf.math.reduce_sum(s, 0)), s)
        (_, f) = tf.linalg.eigh(l)
        f = tf.slice(f, [0, 0], [f.shape[0], k])
        p = tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)
        for i in range(1, n):
             p = tf.concat([p, tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)], 1)
        p = tf.math.multiply(tf.math.square(p), reg)
        wv = wv_u
        s_old = s
        s = tf.math.divide((tf.math.subtract(z_sum, p)), tf.math.reduce_sum(wv, 0))
        if step > 5 and tf.math.less(tf.math.divide(tf.norm(tf.math.subtract(s, s_old)), tf.norm(s_old)), 1.e-3):
            print('Criterion found after', step, 'steps')
            return f
    return f

In [5]:
from sklearn.cluster import KMeans

def run_k_means(k, f):
        k_means = KMeans(n_clusters=k, n_init=10).fit(f)
        clusters = k_means.labels_
        d = {}
        for i in clusters:
            if i in d:
                d[i] += 1
            else:
                d[i] = 1
        print('k:', k, '\n', 'Clusters:', d)

In [6]:
def run_experiments(ks, views):
    x = load_data(views)
    print(x.shape)
    for k in ks:
        for alpha in alphas:
            for beta in betas:
                for gamma in gammas:
                    print('Run with parameters', alpha, beta, gamma)
                    run_k_means(k, fusion(x, k, alpha, beta, gamma).numpy())

In [7]:
run_experiments([2, 7], 'views/nodes/both/')
# Run with parameters 0.1 1000.0 0.01
# k: 2
#  Clusters: {1: 275, 0: 458}
# Run with parameters 0.1 1000.0 0.01
# k: 7
#  Clusters: {6: 11, 4: 25, 2: 17, 0: 427, 1: 234, 5: 12, 3: 7}
# Run with parameters 0.1 1000.0 0.1
# k: 7
#  Clusters: {3: 11, 5: 27, 2: 17, 6: 215, 4: 10, 1: 446, 0: 7}
# Run with parameters 0.1 2000.0 0.01
# k: 7
#  Clusters: {3: 23, 0: 88, 2: 18, 1: 481, 5: 27, 6: 62, 4: 34}

(2, 733, 733)
Run with parameters 1.0 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 604, 1: 129}
Run with parameters 1.0 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 606, 1: 127}
Run with parameters 1.0 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 643, 1: 90}
Run with parameters 1.0 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 650, 1: 83}
Run with parameters 1.0 100.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 726, 1: 7}
Run with parameters 1.0 100.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 726, 1: 7}
Run with parameters 0.1 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {1: 275, 0: 458}
Run with parameters 0.1 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 245, 1: 488}
Run with parameters 0.1 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 577, 1: 156}
Run with parameters 0.1 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {1: 577, 0: 156}

In [8]:
run_experiments([2, 7], 'views/nodes/words/')

(1, 733, 733)
Run with parameters 1.0 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 731, 1: 2}
Run with parameters 1.0 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 730, 1: 3}
Run with parameters 1.0 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with parameters 1.0 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with parameters 1.0 100.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 731, 1: 2}
Run with parameters 1.0 100.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with parameters 0.1 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with parameters 0.1 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 730, 1: 3}
Run with parameters 0.1 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with parameters 0.1 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 732, 1: 1}
Run with para

In [9]:
run_experiments([2, 7], 'views/nodes/files/')
# Run with parameters 0.1 1000.0 0.01
# k: 2
#  Clusters: {1: 254, 0: 479}
# Run with parameters 0.1 1000.0 0.1
# k: 2
#  Clusters: {0: 419, 1: 314}
# Run with parameters 0.1 1000.0 0.01
# k: 7
#  Clusters: {3: 11, 4: 19, 5: 20, 1: 436, 0: 226, 6: 13, 2: 8}
# Run with parameters 0.1 1000.0 0.1
# k: 7
#  Clusters: {5: 14, 0: 29, 4: 24, 1: 370, 3: 11, 6: 9, 2: 276}
# Run with parameters 1.0 1000.0 0.01
# k: 7
#  Clusters: {3: 11, 0: 20, 5: 20, 1: 449, 2: 115, 4: 16, 6: 102}
# Run with parameters 0.1 2000.0 0.01
# k: 7
#  Clusters: {4: 65, 2: 45, 3: 18, 0: 492, 1: 42, 5: 42, 6: 29}
# Run with parameters 0.1 2000.0 0.1
# k: 7
#  Clusters: {0: 67, 5: 8, 2: 19, 4: 1, 1: 139, 6: 13, 3: 486}

(1, 733, 733)
Run with parameters 1.0 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 516, 1: 217}
Run with parameters 1.0 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 608, 1: 125}
Run with parameters 1.0 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 615, 1: 118}
Run with parameters 1.0 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {1: 612, 0: 121}
Run with parameters 1.0 100.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 726, 1: 7}
Run with parameters 1.0 100.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 726, 1: 7}
Run with parameters 0.1 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {1: 254, 0: 479}
Run with parameters 0.1 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 419, 1: 314}
Run with parameters 0.1 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 586, 1: 147}
Run with parameters 0.1 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 544, 1: 18

In [16]:
#run_experiments([2, 7], 'views/two_views/')

In [17]:
#run_experiments([2, 7], 'views/single_words/')

In [18]:
#run_experiments([2, 7], 'views/single_files/')

In [19]:
# run_experiments([2, 7], 'views/all/')

In [14]:
run_experiments([2, 7], 'GFSC/reuters_views/')

(5, 1200, 1200)
Run with parameters 1.0 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185, 1: 15}
Run with parameters 1.0 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185, 1: 15}
Run with parameters 1.0 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185, 1: 15}
Run with parameters 1.0 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185, 1: 15}
Run with parameters 1.0 100.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1158, 1: 42}
Run with parameters 1.0 100.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1158, 1: 42}
Run with parameters 0.1 1000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {1: 1185, 0: 15}
Run with parameters 0.1 1000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {1: 1185, 0: 15}
Run with parameters 0.1 2000.0 0.01
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185, 1: 15}
Run with parameters 0.1 2000.0 0.1
Criterion found after 6 steps
k: 2 
 Clusters: {0: 1185