In [None]:
import os
import numpy as np
import scipy
import time
import matplotlib.pyplot as plt
import gaussian_mix_module as gmm
import TensorFox as tfx

In [None]:
def generate_problem(d=20, k=5, m=10000, display=True):
    # Generate and save data.    
    u, w, sigma = gmm.gen_parameters(d, k)
    samples_per_cluster, data = gmm.gen_samples(u, w, sigma, m)
    M3_approx = gmm.empirical(u, w, sigma, data, k, display)
    scipy.io.savemat('gaussian_mix_data.mat', dict(u=u, w=w, sigma=sigma, M3_approx=M3_approx, d=d, k=k, m=m))
    
    return u, w, sigma, M3_approx

In [None]:
def estimate_variables(w, u, sigma, M3_approx, k, trials=100, display=False):    
    # Compute estimates with Tensor Fox.
    print('trial #  Rel error w  Rel error u')

    class options:
        cg_tol = 1e-16
        cg_factor = 30
        symm = True

    start = time.time()
    w_tfx, u_tfx, w_qlt_tfx, u_qlt_tfx, CPD_info = gmm.learn(w, u, sigma, M3_approx, k, options, trials, display)
    end = time.time() - start
    avg_time = end/trials
    print('Average time =', np.round(avg_time, 3), 'seconds')
    
    return w_tfx, u_tfx, w_qlt_tfx, u_qlt_tfx, avg_time

In [None]:
def matlab_results(k, u, w, sigma, M3_approx, trials, alg):
    best_w_quality = np.inf
    best_u_quality = np.inf

    for i in range(1, trials+1):
        factors = []
        for l in [1, 2, 3]:
            filename = alg + '_' + str(i) + '_' + str(l) + '.mat'
            data = scipy.io.loadmat(filename) 
            factors.append(data['W' + str(l)])
            os.remove(filename)
        Lambda, factors = tfx.cnv.normalize(factors)
        X, Y, Z = factors

        Lambda, X = gmm.fix_parameters(Lambda, X, k)
        w_quality, u_quality = gmm.test_quality(Lambda, X, u, w)

        if w_quality < best_w_quality and u_quality < best_u_quality:
            best_w_qlt = w_quality
            best_u_qlt = u_quality
            best_w, best_u = Lambda.copy(), X.copy()
            
    filename = alg + '_time' + '.mat'
    data = scipy.io.loadmat(filename)            
    avg_time = data['avg_time'][0, 0]
    os.remove(filename)
            
    return best_w, best_u, best_w_qlt, best_u_qlt, avg_time

In [None]:
def plot_results(w_qlt_tfx, u_qlt_tfx, time_tfx, w_qlt_nls, u_qlt_nls, time_nls, w_qlt_minf, u_qlt_minf, time_minf):
    ws = [w_qlt_tfx, w_qlt_nls, w_qlt_minf]
    timings = [time_tfx, time_nls, time_minf]
    plt.ylabel('seconds')
    plt.xlabel('$| \hat{w} - w| / |w|$')
    plt.plot(ws[0], timings[0], 'ko', markersize=10, label='TFX')
    plt.plot(ws[1], timings[1], 'rs', markersize=10, label='NLS')
    plt.plot(ws[2], timings[2], 'b^', markersize=10, label='MINF')
    plt.xscale('log')
    plt.grid()
    plt.legend()
    plt.show()
    
    us = [u_qlt_tfx, u_qlt_nls, u_qlt_minf]
    plt.ylabel('seconds')
    plt.xlabel('$| \hat{u} - u| / |u|$')
    plt.plot(us[0], timings[0], 'ko', markersize=10, label='TFX')
    plt.plot(us[1], timings[1], 'rs', markersize=10, label='NLS')
    plt.plot(us[2], timings[2], 'b^', markersize=10, label='MINF')
    plt.xscale('log')
    plt.grid()
    plt.legend()
    plt.show()
    
    return

In [None]:
# Simple case as example.
d = 2
k = 2
m = 1000
display = True
trials = 10

class options:
    display = 0
    symm = True

print('GENERATING DATA')
print('---------------')
u, w, sigma = gmm.gen_parameters(d, k)
samples_per_cluster, data = gmm.gen_samples(u, w, sigma, m)
M3_approx = gmm.empirical(u, w, sigma, data, k, display)
print('sigma^2 =', sigma**2)
print('w =', w)
print('u1 =', u[:,0])
print('u2 =', u[:,1])
print('samples in first cluster =', samples_per_cluster[0])
print('samples in second cluster =', samples_per_cluster[0])

print()
print('LEARNING')
print('--------')                                                     
w_approx, u_approx, w_quality, u_quality, CPD_info = gmm.learn(w, u, sigma, M3_approx, k, options, trials, display)
print('w_approx =', w_approx)
print('u1_approx =', u_approx[:,0])
print('u2_approx =', u_approx[:,1])

print()
print('PLOTTING GENERATED DATA (IN COLOR) AND LEARNED MEANS (BLACK DOTS)')
print('-----------------------------------------------------------------')
plt.plot(data[0,:samples_per_cluster[0]], data[1,:samples_per_cluster[0]], 'r.')
plt.plot(data[0,samples_per_cluster[0]:], data[1,samples_per_cluster[0]:], '.')
plt.plot(u_approx[0,0], u_approx[1,0], 'ko', markersize=8)
plt.plot(u_approx[0,1], u_approx[1,1], 'ko', markersize=8)
plt.show()

In [None]:
# Real test.
d = 20
k = 5
m = 10000
trials = 100

# Generate data and make variable estimation with Tensor Fox.
u, w, sigma, M3_approx = generate_problem(d=d, k=k, m=m)
w_tfx, u_tfx, w_qlt_tfx, u_qlt_tfx, avg_time = estimate_variables(w, u, sigma, M3_approx, k, trials=trials)  

In [None]:
# Before running this function you must run the script matlab_mix_benchs.m in Matlab, in the same folder.
# Use the same values for k and trial. Run the script for alg = "nls" and "minf".
w_nls, u_nls, w_qlt_nls, u_qlt_nls, avg_time_nls = matlab_results(k, u, w, sigma, M3_approx, trials, 'nls')
w_minf, u_minf, w_qlt_minf, u_qlt_minf, avg_time_minf = matlab_results(k, u, w, sigma, M3_approx, trials, 'minf')

In [None]:
# Show final results.
plot_results(w_qlt_tfx, u_qlt_tfx, avg_time, w_qlt_nls, u_qlt_nls, avg_time_nls, w_qlt_minf, u_qlt_minf, avg_time_minf)