In [1]:
import numpy as np
from PCMM.mixture_EM_loop import mixture_EM_loop
from PCMM.mixture_torch_loop import mixture_torch_loop
from PCMM.helper_functions import calc_NMI
import matplotlib.pyplot as plt

## Tutorial on Phase Coherence Mixture Modeling (PCMM)

When dealing with multivariate narrowband data, such as those from functional neuroimaging modalities such as fMRI, EEG, or MEG, it is useful to model the instantaneous phase coherence between channels as a proxy for instantaneous correlation. This toolbox provides several ways to model such data, though we refer to our paper for all the details: https://www.biorxiv.org/content/10.1101/2024.11.15.623830v1.

In this tutorial, we will demonstrate how to use the toolbox to simulate phase coherence data and fit a K-means, mixture, or HMM. We will use simulated data corresponding to K=2 true components with consistent phase shifts and added noise.

## Generate synthetic data

In [2]:
noise_scale = np.pi*2/3
num_points_per_cluster = 1000
p = 3 # Data dimensionality
q = 2 # Number of frames for grassmann data
K = 2 # Number of true clusters

# True thetas
theta1 = np.array([0,np.pi/2,0]) 
theta2 = np.array([0,0,np.pi/2]) 
thetas = [theta1,theta2]

data_real_projective_hyperplane = np.zeros((num_points_per_cluster,p,K))
data_complex_projective_hyperplane = np.zeros((num_points_per_cluster,p,K),dtype=complex) 
data_grassmann = np.zeros((num_points_per_cluster,p,q,K)) 
data_spsd = np.zeros((num_points_per_cluster,p,q,K))
data_ts = np.zeros((num_points_per_cluster,p,K))

true_labels = np.zeros((K,num_points_per_cluster*2))
true_labels[0,num_points_per_cluster:] = 1
true_labels[1,:num_points_per_cluster] = 1

for n in range(num_points_per_cluster):
    for k in range(K):
        theta_plus_noise = thetas[k]+np.random.random(3)*noise_scale-noise_scale/2

        #timeseries
        data_ts[n,:,k] = np.cos(theta_plus_noise)

        # eigenvector of cosinus matrix
        coh_map = np.outer(np.cos(theta_plus_noise),np.cos(theta_plus_noise))+np.outer(np.sin(theta_plus_noise),np.sin(theta_plus_noise))
        l,u = np.linalg.eig(coh_map)
        order = np.argsort(l)[::-1]
        data_real_projective_hyperplane[n,:,k] = u[:,order[0]]
        data_grassmann[n,:,:,k] = u[:,order[:q]]
        data_spsd[n,:,:,k] = u[:,order[:q]]*np.sqrt(l[order[:q]])[None]

        # eigenvector of complex coherence matrix
        coh_map_complex = np.outer(np.exp(1j*theta_plus_noise),np.exp(-1j*theta_plus_noise))
        l,u = np.linalg.eig(coh_map_complex)
        order = np.argsort(l)[::-1]
        data_complex_projective_hyperplane[n,:,k] = u[:,order[0]]

# concatenate data over the last dimension
data_ts = np.concatenate([data_ts[:,:,0],data_ts[:,:,1]],axis=0)
data_real_projective_hyperplane = np.concatenate([data_real_projective_hyperplane[:,:,0],data_real_projective_hyperplane[:,:,1]],axis=0)
data_complex_projective_hyperplane = np.concatenate([data_complex_projective_hyperplane[:,:,0],data_complex_projective_hyperplane[:,:,1]],axis=0)
data_grassmann = np.concatenate([data_grassmann[:,:,:,0],data_grassmann[:,:,:,1]],axis=0)
data_spsd = np.concatenate([data_spsd[:,:,:,0],data_spsd[:,:,:,1]],axis=0)

In [3]:
import pandas as pd
# Create a Pandas DataFrame to collect all results. Columns should be 'model', 'manifold', 'init', 'HMM', numpy/torch, 'NMI'
df = pd.DataFrame(columns=['model', 'manifold', 'init', 'HMM', 'numpy/torch', 'NMI'])

## K-means models

K-means models here include the following:
- Least squares K-means whereupon input eigenvectors are sign-flipped such that the majority of elements are negative
- Diametrical clustering
- Complex diametrical clustering
- Grassmann clustering
- Weighted Grassmann clustering

In [4]:
K = 2 # Number of estimated components
max_iter = 10000 # Maximum number of iterations
init = '++' # Initialization method ('uniform' or '++')
tol = 1e-10 # Tolerance for the convergence criterion
num_repl = 2 # Number of estimates to choose the best from (different initializations)
from PCMM.phase_coherence_kmeans import *

The K-means models output the cluster centers, data partition vector, and objective function.

In [5]:
C_ls,X_part_ls,obj = least_squares_sign_flip(data_real_projective_hyperplane, K,max_iter=max_iter,init=init,tol=tol,num_repl=num_repl)
C_dc,X_part_dc,obj = diametrical_clustering(data_real_projective_hyperplane, K,max_iter=max_iter,init=init,tol=tol,num_repl=num_repl)
C_cdc,X_part_cdc,obj = diametrical_clustering(data_complex_projective_hyperplane, K,max_iter=max_iter,init=init,tol=tol,num_repl=num_repl)
C_gc,X_part_gc,obj = grassmann_clustering(data_grassmann, K,max_iter=max_iter,init=init,tol=tol,num_repl=num_repl)
C_wgc,X_part_wgc,obj = weighted_grassmann_clustering(data_spsd, K,max_iter=max_iter,init=init,tol=tol,num_repl=num_repl)

In [6]:
print('Least squares on sign-flipped leading eigenvectors: NMI='+str(calc_NMI(true_labels,np.eye(K)[X_part_ls].T)))
print('Diametrical clustering on real projective hyperplane: NMI='+str(calc_NMI(true_labels,np.eye(K)[X_part_dc].T)))
print('Diametrical clustering on complex projective hyperplane: NMI='+str(calc_NMI(true_labels,np.eye(K)[X_part_cdc].T)))
print('Grassmann clustering: NMI='+str(calc_NMI(true_labels,np.eye(K)[X_part_gc].T)))
print('Weighted Grassmann clustering: NMI='+str(calc_NMI(true_labels,np.eye(K)[X_part_wgc].T)))
df = pd.DataFrame({'model': ['Least squares on sign-flipped leading eigenvectors', 'Diametrical clustering', 'Complex diametrical clustering', 'Grassmann clustering', 'Weighted Grassmann clustering'],
                     'manifold': ['Real projective hyperplane', 'Real projective hyperplane', 'Complex projective hyperplane', 'Grassmann', 'Grassmann'],
                        'init': [init, init, init, init, init],
                        'HMM': [False, False, False, False, False],
                        'numpy/torch': ['numpy', 'numpy', 'numpy', 'numpy', 'numpy'],
                        'rank': [1,1,1,2,2],
                        'NMI': [calc_NMI(true_labels,np.eye(K)[X_part_ls].T), calc_NMI(true_labels,np.eye(K)[X_part_dc].T), calc_NMI(true_labels,np.eye(K)[X_part_cdc].T), calc_NMI(true_labels,np.eye(K)[X_part_gc].T), calc_NMI(true_labels,np.eye(K)[X_part_wgc].T)]})

Least squares on sign-flipped leading eigenvectors: NMI=0.16220547294931656
Diametrical clustering on real projective hyperplane: NMI=0.14342238974110763
Diametrical clustering on complex projective hyperplane: NMI=0.7938573675374688
Grassmann clustering: NMI=0.12163509412433136
Weighted Grassmann clustering: NMI=0.24772033459036275


## Mixture models, numpy estimation (EM)

### Instancing a mixture model
The implemented statistical distributions are:
- Watson (rank-1 component)
- Angular central Gaussian (ACG) (rank-r component)
- Matrix angular central Gaussian (MACG) (rank-r component)
- Singular Wishart (rank-r component)
- Gaussian (rank-r component)

Of these, Watson and ACG are for data on the (complex) projective hyperplane, i.e., the input data vectors should be normalized to unit norm. MACG is for data on the Grassmann manifold, i.e., the input data should be orthonormal matrices. Singular Wishart is for symmetric positive semidefinite matrices, i.e., the input data should be matrices corresponding to eigenvectors scaled by the square root of their eigenvalues. 

In our implementation, a mixture model first has to be instanced with some required inputs:
- K: number of components
- p: data dimensionality (e.g,. number of brain regions)
- q: number of orthonormal frames (only applicable to MACG and Singular Wishart)

Furthermore, the model instance hold the following optional inputs:
- rank [None]: The desired component rank (not applicable to Watson, which estimates a rank-1 component). Defaults to a full-rank model.
- complex [False]: Whether the distribution should be for complex data (only applicable to Watson and ACG)
- params [None]: Initial parameters for the model. Should be formatted the same way as for a model output.

### Estimating the model
For model estimation, we have made a training loop available, which has the following required inputs:
- model: The mixture model instance
- data: The input data as a numpy array of either shape (n, p) (projective hyperplane) or (n, p, q) (Grassmann or SPSD) depending on the model

The training loop has the following optional inputs:
- max_iter [10000]: Maximum number of iterations
- tol [1e-8]: Tolerance for convergence
- num_repl [1]: Number of restarts to choose the best model from
- init [None]: Initial parameters for the model. Options are 'uniform' for randomly initialized parameters, 'ls', 'dc', 'gc', and 'wgc' for K-means initialization as above, 'dc++', 'gc++', and 'wgc++' for K-means++ initialization, and 'ls_seg', 'dc_seg', 'gc_seg', and 'wgc_seg' for K-means followed by K single-component estimates. 

For initialization, note that 'dc' can be applied for Grassmann data and SPSD data, in that case, only the first column of the orthonormal matrices are used. Oppositely, 'gc' and 'wgc' cannot be used for projective hyperplane data.


In [7]:
# Parameters applying to all methods
max_iter = 10000 # Maximum number of iterations
tol = 1e-10 # Tolerance for the convergence criterion
params = None # Initial set of parameters
num_repl = 2

In [8]:
# Watson mixture model (real)
from PCMM.PCMMnumpy import Watson
model = Watson(K=K, p=p, complex=False, params=None)
init = 'dc' # initialization with diametrical clustering (which is itself seeded by dc++)
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_real_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('Watson mixture model (real): NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'Watson mixture', 'manifold': 'Real projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': 1, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning EM loop


Convergence towards tol: 6.16e-11:   0%|          | 37/10000 [00:00<01:59, 83.32it/s]

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning EM loop


Convergence towards tol: 6.16e-11:   0%|          | 38/10000 [00:00<01:59, 83.53it/s]
Convergence towards tol: 5.96e-11:   0%|          | 31/10000 [00:00<01:54, 87.20it/s]

Watson mixture model (real): NMI=0.15172924684348932





In [9]:
# Watson mixture model (complex)
from PCMM.PCMMnumpy import Watson
model = Watson(K=K, p=p, complex=True, params=None)
init = 'dc' # initialization with diametrical clustering (which is itself seeded by dc++)
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_complex_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('Watson mixture model (complex): NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'Watson mixture', 'manifold': 'Complex projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': 1, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning EM loop


Convergence towards tol: 8.06e-11:   0%|          | 18/10000 [00:00<02:39, 62.46it/s]

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning EM loop


Convergence towards tol: 8.06e-11:   0%|          | 19/10000 [00:00<02:45, 60.21it/s]
Convergence towards tol: 4.11e-11:   0%|          | 20/10000 [00:00<02:58, 55.81it/s]

Watson mixture model (complex): NMI=0.8410397418471373





In [10]:
# ACG mixture model (real)
rank=2
from PCMM.PCMMnumpy import ACG
model = ACG(K=K, p=p, rank=rank, complex=False, params=None)
init = 'dc' 
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_real_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('ACG mixture model (real): NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'ACG mixture', 'manifold': 'Real projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


In the initial phase:   0%|          | 0/10000 [00:00<?, ?it/s]

Convergence towards tol: 8.06e-11:   0%|          | 50/10000 [00:00<01:02, 158.93it/s]

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 8.06e-11:   1%|          | 51/10000 [00:00<00:50, 195.15it/s]
Convergence towards tol: 8.98e-11:   0%|          | 41/10000 [00:00<00:51, 191.91it/s]

ACG mixture model (real): NMI=0.08051159317954622





In [11]:
# ACG mixture model (complex)
rank=2
from PCMM.PCMMnumpy import ACG
model = ACG(K=K, p=p, rank=rank, complex=True, params=None)
init = 'dc' 
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_complex_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('ACG mixture model (complex): NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'ACG mixture', 'manifold': 'Complex projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 7.77e-07:   0%|          | 14/10000 [00:00<01:04, 155.21it/s]

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 7.77e-07:   0%|          | 15/10000 [00:00<01:09, 144.64it/s]
Convergence towards tol: 1.34e-07:   0%|          | 15/10000 [00:00<01:11, 140.08it/s]


ACG mixture model (complex): NMI=0.7595278456175755


In [12]:
# MACG mixture model
from PCMM.PCMMnumpy import MACG
rank = 2
model = MACG(K=K, p=p, q=2, rank=rank, params=None)
init = 'gc'
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_grassmann, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('MACG mixture model: NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'MACG mixture', 'manifold': 'Grassmann', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 8.84e-11:   0%|          | 24/10000 [00:02<06:31, 25.50it/s]

Running grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 8.84e-11:   0%|          | 25/10000 [00:02<17:22,  9.57it/s]
Convergence towards tol: 4.91e-10:   0%|          | 22/10000 [00:02<19:36,  8.48it/s]

MACG mixture model: NMI=1.1761810459819492e-11





In [13]:
# Singular Wishart model
from PCMM.PCMMnumpy import SingularWishart
rank = 2
model = SingularWishart(K=K, p=p, q=2, rank=rank, params=None)
init = 'wgc'
params, posterior, loglik = mixture_EM_loop(
    model=model, data=data_spsd, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init
)
print('Singular Wishart model: NMI='+str(calc_NMI(true_labels,posterior)))
df = pd.concat([df,pd.DataFrame({'model': 'Singular Wishart', 'manifold': 'Grassmann', 'init': init, 'HMM': False, 'numpy/torch': 'numpy', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior)]})], ignore_index=True)

Running weighted grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 8.09e-11:   1%|          | 110/10000 [00:01<01:47, 91.82it/s]

Running weighted grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning EM loop


Convergence towards tol: 8.09e-11:   1%|          | 111/10000 [00:01<01:50, 89.48it/s]
Convergence towards tol: 9.81e-11:   1%|          | 113/10000 [00:01<02:03, 80.07it/s]

Singular Wishart model: NMI=0.0704905507094566





## Mixture model estimation in PyTorch

Mixture estimation in PyTorch can be very fruitful for large datasets, both because of the possibility to run on GPUs and since EM estimation often requires intensive loops for each iteration. The PyTorch implementation is very similar to the numpy implementation, but with the following differences:

### Instancing a mixture model
A mixture model now has the option to also include a HMM and a samples_per_sequence parameter:
- HMM [False]: Whether the model should be a Hidden Markov Model
- samples_per_sequence [0]: Number of samples per sequence in the HMM to avoid temporal smoothing over sequences. If 0, it is assumed to be one sequence. If int, it is assumed to be N/samples_per_sequence sequences. Otherwise it can be a list. 
- For ACG, MACG, Singular Wishart, and Gaussian, the `rank` parameter is now required.

### Estimating the model
The training loop has the following additional optional inputs:
- LR [0.1]: Learning rate for the optimizer
- decrease_lr_on_plateau [False]: Whether the learning rate should decrease by a factor of 10 once when tol has been reached. 

In [14]:
# Parameters applying to all methods
max_iter = 10000 # Maximum number of iterations
tol = 1e-10 # Tolerance for the convergence criterion
params = None # Initial set of parameters
LR = 0.1 # Learning rate
num_repl = 2

In [15]:
# Watson mixture model (real)
from PCMM.PCMMtorch import Watson # Note the torch!
model = Watson(K=K, p=p, complex=False, params=None, HMM=False)
init = 'dc' 
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_real_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('Watson mixture model (real): NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'Watson mixture', 'manifold': 'Real projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': 1, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning numerical optimization loop


Convergence towards tol: 6.33e-06:   4%|▍         | 434/10000 [00:12<05:10, 30.84it/s]

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning numerical optimization loop


Convergence towards tol: 6.33e-06:   4%|▍         | 435/10000 [00:12<04:29, 35.44it/s]
Convergence towards tol: 2.20e-06:   6%|▌         | 554/10000 [00:16<04:47, 32.83it/s]

Watson mixture model (real): NMI=0.15874834151182368





In [16]:
# Watson mixture model (complex)
from PCMM.PCMMtorch import Watson
model = Watson(K=K, p=p, complex=True, params=None, HMM=False)
init = 'dc' # initialization with diametrical clustering (which is itself seeded by dc++)
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_complex_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('Watson mixture model (complex): NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'Watson mixture', 'manifold': 'Complex projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': 1, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning numerical optimization loop


Convergence towards tol: 2.65e-09:   3%|▎         | 315/10000 [00:07<04:27, 36.21it/s]

Running diametrical clustering initialization
Initializing mu based on the clustering centroid
Beginning numerical optimization loop


Convergence towards tol: 2.65e-09:   3%|▎         | 315/10000 [00:07<04:05, 39.40it/s]
Convergence towards tol: 1.67e-07:   2%|▏         | 214/10000 [00:05<04:23, 37.20it/s]

Watson mixture model (complex): NMI=0.8410788344626612





In [17]:
# ACG mixture model (real)
rank=2
from PCMM.PCMMtorch import ACG
model = ACG(K=K, p=p, rank=rank, complex=False, params=None, HMM=False)
init = 'dc' 
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_real_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('ACG mixture model (real): NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'ACG mixture', 'manifold': 'Real projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 9.90e-11:   8%|▊         | 824/10000 [00:03<00:34, 269.34it/s]

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 9.90e-11:   8%|▊         | 825/10000 [00:03<00:34, 268.95it/s]
Convergence towards tol: 9.95e-11:   5%|▌         | 519/10000 [00:03<01:03, 148.43it/s]


ACG mixture model (real): NMI=0.1764080594348394


In [18]:
# ACG mixture model (complex)
rank=2
from PCMM.PCMMtorch import ACG
model = ACG(K=K, p=p, rank=rank, complex=True, params=None, HMM=False)
init = 'dc' 
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_complex_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('ACG mixture model (complex): NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'ACG mixture', 'manifold': 'Complex projective hyperplane', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 2.90e-12:   2%|▏         | 165/10000 [00:00<00:42, 229.33it/s]

Running diametrical clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 2.90e-12:   2%|▏         | 166/10000 [00:00<00:44, 219.90it/s]
Convergence towards tol: 2.22e-04:   0%|          | 20/10000 [00:00<01:00, 164.88it/s]


ACG mixture model (complex): NMI=0.7752507210523177


### HMM
It's always a good idea to initialize HMMs from a learned mixture model. In this case, remember to set init='no'

In [19]:
# ACG mixture model (complex, HMM)
rank=2
from PCMM.PCMMtorch import ACG
import copy
model = ACG(K=K, p=p, rank=rank, complex=True, params=copy.deepcopy(params), HMM=True)
init = 'no' 
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_complex_projective_hyperplane, 
    tol=tol, max_iter=max_iter, num_repl=1, init=init, LR=LR
)
print('ACG mixture model (complex): NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'ACG mixture', 'manifold': 'Complex projective hyperplane', 'init': init, 'HMM': True, 'numpy/torch': 'torch', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Beginning numerical optimization loop


Convergence towards tol: 5.64e-08:   7%|▋         | 701/10000 [08:25<1:51:47,  1.39it/s]

ACG mixture model (complex): NMI=1.0





In [20]:
# MACG mixture model
from PCMM.PCMMtorch import MACG
rank = 2
model = MACG(K=K, p=p, q=2, rank=rank, params=None, HMM=False)
init = 'gc'
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_grassmann, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('MACG mixture model: NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'MACG mixture', 'manifold': 'Grassmann', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


In the initial phase:   0%|          | 6/10000 [00:00<02:31, 65.89it/s]

Convergence towards tol: 9.97e-11:   5%|▍         | 497/10000 [00:06<02:03, 77.16it/s]

Running grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 9.97e-11:   5%|▍         | 498/10000 [00:06<02:04, 76.33it/s]
Convergence towards tol: 9.98e-11:   5%|▌         | 501/10000 [00:07<02:18, 68.83it/s]

MACG mixture model: NMI=0.2344825294017614





In [21]:
# Singular Wishart model
from PCMM.PCMMtorch import SingularWishart
rank = 2
model = SingularWishart(K=K, p=p, q=2, rank=rank, params=None, HMM=False)
init = 'wgc'
params, posterior, loglik = mixture_torch_loop(
    model=model, data=data_spsd, 
    tol=tol, max_iter=max_iter, num_repl=num_repl, init=init, LR=LR
)
print('Singular Wishart model: NMI='+str(calc_NMI(true_labels,posterior.numpy())))
df = pd.concat([df,pd.DataFrame({'model': 'Singular Wishart', 'manifold': 'Grassmann', 'init': init, 'HMM': False, 'numpy/torch': 'torch', 'rank': rank, 'NMI': [calc_NMI(true_labels,posterior.numpy())]})], ignore_index=True)

Running weighted grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 5.83e-08:   2%|▏         | 200/10000 [00:01<00:56, 174.69it/s]

Running weighted grassmann clustering initialization
Initializing M based on a lowrank-svd of the input data partitioned acc to the clustering
Beginning numerical optimization loop


Convergence towards tol: 5.83e-08:   2%|▏         | 201/10000 [00:01<00:56, 172.02it/s]
Convergence towards tol: 1.44e-05:   2%|▏         | 165/10000 [00:01<01:17, 126.97it/s]

Singular Wishart model: NMI=0.07468450672559256





See the results below. If interested, try changing the noise scale in the second cell to see what changes. The data generated is from a mixture of rank-1 true components, and thus, complex diametrical clustering and Watson mixtures perform well below, with no added advantage of ACG. All other models are unable to model consistent phase shifts well

In [22]:
df['NMI'] = df['NMI'].apply(lambda x: '{:.3f}'.format(x))
df

Unnamed: 0,model,manifold,init,HMM,numpy/torch,rank,NMI
0,Least squares on sign-flipped leading eigenvec...,Real projective hyperplane,++,False,numpy,1,0.162
1,Diametrical clustering,Real projective hyperplane,++,False,numpy,1,0.143
2,Complex diametrical clustering,Complex projective hyperplane,++,False,numpy,1,0.794
3,Grassmann clustering,Grassmann,++,False,numpy,2,0.122
4,Weighted Grassmann clustering,Grassmann,++,False,numpy,2,0.248
5,Watson mixture,Real projective hyperplane,dc,False,numpy,1,0.152
6,Watson mixture,Complex projective hyperplane,dc,False,numpy,1,0.841
7,ACG mixture,Real projective hyperplane,dc,False,numpy,2,0.081
8,ACG mixture,Complex projective hyperplane,dc,False,numpy,2,0.76
9,MACG mixture,Grassmann,gc,False,numpy,2,0.0
