In [None]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
grandparentdir = os.path.dirname(parentdir)
sys.path.insert(0, parentdir) 
sys.path.insert(0, grandparentdir)

from bmvc import *
from simulations.simulations import *
from sklearn.preprocessing import scale
from pickle import load
from runners import *
import autograd.numpy as np

In [None]:
# Simulation parameters
sim_rs = 1 # random seed 
p = 0.5 # binomial randomness parameter
shift = 0.5 # size of shift 
n = 100  # number of samples per clsuter
n_shift = 50 # number of samples to shift
sigma = 0.3 # variance of Gaussians
K = 4 # number of clusters to infer

In [None]:
# Clustering parameters
phi = None # infer phi
phi_mean_prior = 0
phi_scale_prior = 1
rs = 2 # random seed
iter = 10
init= 'kmeans'
inference = 'alternating'
pseudo = False  # whether to use pseudolikelihood

In [None]:
# clusters, n1, n2, sigma1, sigma2, R = two_cluster_sim(n, sigma)
clusters, n1, n2, sigma1, sigma2, R = four_cluster_baseline(n, sigma)

# samples1, samples2, labels1, labels2 = generate_data_p_mod_gt(clusters, [n1, n2], [sigma1, sigma2], p, shift, n_shift, rs=sim_rs)
samples1, samples2, labels1, labels2 = generate_data_baseline_mod(clusters, [n1, n2], [sigma1, sigma2], p, shift, n_shift, rs=sim_rs)

In [None]:
visualize_2d(clusters, clusters, samples1, samples2, labels1, labels2)

In [None]:
X1 = scale(samples1)
X2 = scale(samples2)

In [None]:
print("Confirming data is scaled...")
print(np.var(X1, 0))
print(np.var(X2, 0))
print(np.mean(X1, 0))
print(np.mean(X2, 0))

In [None]:
step_size = np.zeros(K*2 + K*2 + 2, dtype=float)
pi_step = 0.1
phi_step = 1
step_size[:K*2+K*2] = pi_step
step_size[K*2+K*2:] = phi_step

In [None]:
model, parameters = gen_vi_mvc_results(X1, X2, R, K, 
                           phi=phi,
                           phi_mean_prior=phi_mean_prior,
                           phi_scale_prior=phi_scale_prior,
                           init=init,
                           random_state=rs,
                           iter=iter,
                           inference=inference,
                           step_size=step_size, 
                           pseudo=pseudo)

In [None]:
visualize_2d_results(X1, X2, labels1, labels2, model.muA, model.muB, model.sigmaA, model.sigmaB)
visualize_2d_results(X1, X2, model.zA, model.zB, model.muA, model.muB, model.sigmaA, model.sigmaB)

In [None]:
compare_clusterings_ari(labels1, model.zA, 'ARI MVC View 1')
compare_clusterings_ari(labels2, model.zB, 'ARI MVC View 2')

In [None]:
phi = 0
pseudo = False
step_size = np.zeros(K*2 + K*2, dtype=float)
pi_step = 0.1
phi_step = 1
step_size[:K*2+K*2] = pi_step

In [None]:
model, parameters = gen_gd_mvc_results(X1, X2, R, K, 
                           phi=phi,
                           phi_mean_prior=phi_mean_prior,
                           phi_scale_prior=phi_scale_prior,
                           init=init,
                           random_state=rs,
                           iter=iter,
                           inference=inference,
                           step_size=step_size,
                           pseudo=pseudo)

In [None]:
visualize_2d_results(X1, X2, labels1, labels2, model.muA, model.muB, model.sigmaA, model.sigmaB)
visualize_2d_results(X1, X2, model.zA, model.zB, model.muA, model.muB, model.sigmaA, model.sigmaB)

In [None]:
compare_clusterings_ari(labels1, model.zA, 'GMM View 1')
compare_clusterings_ari(labels2, model.zB, 'GMM View 2')

In [None]:
merged_samples = np.concatenate((np.array(samples1), np.array(samples2)), 1)
X = scale(merged_samples)
gmm = GaussianMixture(n_components=K, random_state=rs)
gmm_z = gmm.fit(X).predict(X)
ari_gmm_view1 = compare_clusterings_ari(labels1, gmm_z, 'Merged GMM, View 1')
ari_gmm_view2 = compare_clusterings_ari(labels2, gmm_z, 'Merged GMM, View 2')

In [None]:
visualize_2d_results(X[:, :2], X[:, 2:4], labels1, labels2, gmm.means_[:,:2], gmm.means_[:, 2:4], [covariance[0:2, 0:2] for covariance in gmm.covariances_], [covariance[2:4, 2:4] for covariance in gmm.covariances_])
visualize_2d_results(X[:, :2], X[:, 2:4], gmm_z, gmm_z, gmm.means_[:,:2], gmm.means_[:, 2:4], [covariance[0:2, 0:2] for covariance in gmm.covariances_], [covariance[2:4, 2:4] for covariance in gmm.covariances_])

## 