In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
def load_data(path):
    x = None
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), 'rb') as f:
            tmp = tf.cast([np.load(f)], dtype=tf.float64)
            if x is None:
                x = tf.linalg.matmul(tmp, tmp, transpose_b=True)
            else:
                x = tf.concat([x, tf.linalg.matmul(tmp, tmp, transpose_b=True)], axis=0)
    return x

In [3]:
steps = 200
# alphas, betas, gammas = [1.0, 0.1], [1000.0, 2000.0, 100.0], [0.01, 0.1]
alphas, betas, gammas = [0.1], [1000.0], [0.01]

In [4]:
def fusion(x, k, alpha, beta, gamma):
    f = None
    n = x.shape[1]
    t = x.shape[0]
    reg = gamma / (4.0 * beta)
    wv, wv_u = tf.cast([1.0 / t for _ in range(t)], dtype=tf.float64), None
    i_alpha = tf.math.multiply(tf.eye(n, dtype=tf.float64), alpha)
    i_beta = tf.math.multiply(tf.eye(n, dtype=tf.float64), beta)
    s = tf.eye(n, dtype=tf.float64)
    for step in range(steps):
        s = tf.where(s > 0, s, tf.zeros(s.shape, dtype=tf.float64))
        s = tf.math.divide(tf.math.add(s, tf.transpose(s)), 2.0)
        z_sum = tf.zeros(s.shape, dtype=tf.float64)
        for i in range(t):
            z = tf.linalg.solve(
                tf.math.add(tf.math.add(x[i], i_alpha), tf.math.multiply(i_beta, wv[i])),
                tf.math.add(tf.math.multiply(s, tf.math.multiply(wv[i], beta)), x[i]))
            z = tf.where(z > 0, z, tf.zeros(z.shape, dtype=tf.float64))
            z = tf.math.divide(tf.math.add(z, tf.transpose(z)), 2.0)
            w = tf.math.divide(0.5, tf.norm(tf.math.subtract(z, s)))
            wv_u = tf.reshape(w, [1]) if i == 0 else tf.concat([wv_u, [w]], axis=0)
            z = tf.math.multiply(z, w)
            z_sum = tf.math.add(z_sum, z)

        l = tf.math.subtract(tf.linalg.diag(tf.math.reduce_sum(s, 0)), s)
        (_, f) = tf.linalg.eigh(l)
        f = tf.slice(f, [0, 0], [f.shape[0], k])
        p = tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)
        for i in range(1, n):
             p = tf.concat([p, tf.norm(tf.math.subtract(f[0][:], f), axis=1, keepdims=True)], 1)
        p = tf.math.multiply(tf.math.square(p), reg)
        wv = wv_u
        s_old = s
        s = tf.math.divide((tf.math.subtract(z_sum, p)), tf.math.reduce_sum(wv, 0))
        if step > 5 and tf.math.less(tf.math.divide(tf.norm(tf.math.subtract(s, s_old)), tf.norm(s_old)), 1.e-4):
            print('Criterion found after', step, 'steps')
            return f
    return f

In [5]:
from sklearn.cluster import KMeans

def run_k_means(k, f):
        k_means = KMeans(n_clusters=k, n_init=10).fit(f)
        clusters = k_means.labels_
        d = {}
        for i in clusters:
            if i in d:
                d[i] += 1
            else:
                d[i] = 1
        max_k = 0
        for i in d:
            max_k = max(max_k, d[i])
        print('Biggest cluster:', max_k)
        print('k:', k, '\n', 'Clusters:', d)
        return clusters

In [6]:
def run_experiments(ks, views):
    d = {}
    x = load_data(views)
    print(x.shape)
    for k in ks:
        d[k] = {}
        for alpha in alphas:
            d[k][alpha] = {}
            for beta in betas:
                d[k][alpha][beta] = {}
                for gamma in gammas:
                    print('Run with parameters', alpha, beta, gamma)
                    d[k][alpha][beta][gamma] = run_k_means(k, fusion(x, k, alpha, beta, gamma).numpy())
    return d

In [7]:
def save_np(name, matrix):
    with open(name + ".npy", 'wb') as f:
        np.save(f, matrix)

def save_results(results, c_id):
    for k in results:
        for alpha in results[k]:
            for beta in results[k][alpha]:
                for gamma in results[k][alpha][beta]:
                    tmp = 'clusters/'+c_id+'_'+ str(k)+'_'+str(alpha)+'_'+str(beta)+'_'+str(gamma)
                    save_np(tmp, results[k][alpha][beta][gamma])

In [9]:
save_results(run_experiments([70], 'views/profiles/'), 'profiles')

(2, 391, 391)
Run with parameters 0.1 1000.0 0.01
Criterion found after 6 steps
Biggest cluster: 29
k: 70 
 Clusters: {15: 5, 1: 6, 38: 3, 43: 4, 18: 2, 7: 6, 26: 4, 34: 8, 66: 2, 37: 3, 9: 3, 33: 3, 17: 4, 57: 2, 4: 4, 42: 4, 58: 3, 65: 2, 27: 4, 16: 15, 30: 20, 67: 29, 47: 21, 35: 1, 11: 3, 2: 4, 56: 2, 60: 4, 39: 2, 22: 2, 24: 17, 6: 2, 55: 2, 12: 5, 31: 16, 49: 5, 20: 3, 53: 4, 45: 28, 54: 2, 29: 4, 62: 4, 40: 9, 25: 2, 0: 2, 32: 5, 68: 3, 13: 3, 23: 2, 3: 18, 64: 3, 50: 7, 10: 2, 28: 2, 61: 4, 48: 3, 41: 2, 5: 24, 63: 2, 8: 12, 44: 1, 51: 2, 19: 6, 59: 1, 21: 1, 46: 3, 69: 1, 52: 1, 14: 1, 36: 2}
