# MAD Mix GMM extension

To handle multivariate data and learn weights and covariance matrices.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import sys,time
sys.path.insert(1, '../src/')
import madmix
import aux

plt.rcParams.update({'figure.max_open_warning': 0})
plt.rcParams["figure.figsize"]=15,7.5
plt.rcParams.update({'font.size': 40})

In [2]:
####################
####################
#   auxiliary fns  #
####################
####################
def madmix_gmm_flatten(ws,mus,sigmas):
    """
    Flatten weights, meand, and covariances into 2D array
    
    Inputs:
        ws     : (K,B) array, weights
        mus    : (K,D,B) array, cluster means
        sigmas : (K,D,D,B) array, cluster covariances
    
    Outpus:
        xc     : (K',B) array, flattened values
        
    Note:
    K is the number of clusters, D is data dimension, 
    and B is the number of data points (for vectorizing)
    K'= K (weights) + KxD (means) + KxDxD (covariances)
    """
    K,D,B=mus.shape
    
    flat_mus=mus.reshape(K*D,B)
    flat_sigmas=sigmas.reshape(K*D*D,B)
    return np.vstack((ws,flat_mus,flat_sigmas))


def madmix_gmm_unflatten(xc,K,D):
    """
    Unflatten xc into weights, meand, and covariances
    
    Inputs:
        xc     : (K',B) array, flattened values
    
    Outputs:
        ws     : (K,B) array, weights
        mus    : (K,D,B) array, cluster means
        sigmas : (K,D,D,B) array, cluster covariances
        
    Note:
    K is the number of clusters, D is data dimension, 
    and B is the number of data points (for vectorizing)
    K'= K (weights) + KxD (means) + KxDxD (covariances)
    """
    B=xc.shape[-1]
    
    # recover each flattened var
    ws=xc[:K,:]
    flat_mus=xc[K:(K*D+K),:]
    flat_sigmas=xc[(K*D+K):,:]
    
    # unflatten separately
    mus=flat_mus.reshape(K,D,B)
    sigmas=flat_sigmas.reshape(K,D,D,B)
    
    return ws,mus,sigmas

In [23]:
########################
########################
# target specification #
########################
########################
def lp(xd,xc,axis=None):
    # compute the univariate log joint and conditional target pmfs
    #
    # inputs:
    #    xd     : (N,B) array with labels
    #    xc     : (K',B) array with means
    #    axis   : int (0<axis<N), axis to find full conditional; if None then returns the log joint
    # outputs:
    #   ext_lprb : if axis is None, (B,) array with log joint; else, (B,K) array with d conditionals 
    N,B=xd.shape
    
    ws,mus,sigmas=madmix_gmm_unflatten(xc,K,D)
    lprbs=np.zeros((N,K,B))
    for k in range(K): 
        for b in range(B):
            lprbs[:,k,b]=stats.multivariate_normal(mus[k,:,b],sigmas[k,:,:,b]).logpdf(y)
        # end for
    # end for
    lprbs=lprbs-aux.LogSumExp(np.moveaxis(lprbs,1,0))[:,np.newaxis,:]
    
    ext_lprb=np.zeros((N,B))
    if axis is None: 
        ext_lprb=np.zeros((N,B))
        for b in range(B): ext_lprb[:,b]=lprbs[np.arange(0,N),xd[:,b],b]
        return np.sum(ext_lprb,axis=0)
    # end if
    return lprbs[axis,:,:].T



def gen_grad_lp(xd): 
    # generate the score function for Hamiltonian dynamics
    #
    # inputs:
    #    xd     : (N,B) array with current labels
    # outputs:
    #   grad_lp : function, vectorized score function ((K',B)->(K',B))
    #
    # Note: K is the number of clusters, D is data dimension, 
    # and B is the number of data points (for vectorizing)
    # K'= K (weights) + KxD (means) + KxDxD (covariances)
    
    idx=(xd==np.arange(0,K,dtype=int)[None,:,None])                         #(N,K,B)
    N_pool=np.sum(idx,axis=0)                                               #(K,B)
    y_pool=np.sum(y[:,:,None,None]*idx[:,None,:,:],axis=0)/N_pool[None,:,:] #(D,K,B)
    diffs=y[:,:,None,None]-y_pool[None,:,:,:]                               #(N,D,K,B)
    S_pool=np.sum(diffs[:,:,None,:,:]*diffs[:,None,:,:,:],axis=0)           #(D,D,K,B)
    S_pool=S_pool/N_pool[None,None,:,:]                                     #(D,D,K,B)
    S_pool=np.moveaxis(S_pool,2,0)                                          #(K,D,D,B)
    S_poolT=np.transpose(S_pool,axes=(0,2,1,3)) # transpose DxD block, leave first and last axes untouched
    
    N_,D_,K_,B_= diffs.shape
    
    def mygrad_lp(xc): # in: (K',B)
        # retrieve unflattened params and invert covariance matrices
        ws,mus,Sigmas=madmix_gmm_unflatten(xc,K_,D_) #(K,B), (K,D,B),(K,D,D,B)
        invSigmas=np.zeros((K,D,D,B))
        for k in range(K):
            for b in range(B):
                invSigmas[k,:,:,b]=np.linalg.inv(Sigmas[k,:,:,b])
            # end for
        # end for
        invSigmasT=np.transpose(invSigmas,axes=(0,2,1,3)) # transpose DxD block, leave first and last axes untouched
        
        # more quantities
        cluster_diffs=np.moveaxis(y_pool,1,0)-mus #(K,D,B)
        
        # calculate separate gradients
        grad_logw=N_pool/ws #(K,B)
        grads_logmu=np.zeros((K,D,B))
        grads_logsigma=np.zeros((K,D,D,B))
        for k in range(K):
            grads_logmu[k,:,:]=-N_pool[None,k,:]*np.sum(invSigmas[k,:,:,:]*cluster_diffs[k,None,:,:],axis=0) #(D,B)
            grads_logsigma[k,:,:,:]=-0.5*(1+N_pool[k,None,None,:])*invSigmasT[k,:,:,:] #(D,D,B)
            grads_logsigma[k,:,:,:]-=0.5*N_pool[k,None,None:]*cluster_diffs[k,:,None,:]*cluster_diffs[k,None,:,:] #(D,D,B)
            grads_logsigma[k,:,:,:]+=0.5*np.matmul(invSigmasT[k,:,:,:],np.matmul(S_poolT[k,:,:,:],invSigmasT[k,:,:,:])) #(D,D,B)
        # end for
        
        #grads_logmu=-N_pool[k,None,:]*np.sum(invSigmas*cluster_diffs[:,None,:,:],axis=1) #(K,D,B)
        #grads_logsigma=-0.5*(1+N_pool[k,None,None,:])*invSigmasT #(K,D,D,B)
        #grads_logsigma-=0.5*N_pool[:,None,None:]*cluster_diffs[:,:,None,:]*cluster_diffs[:,None,:,:] #(K,D,D,B)
        #grads_logsigma+=0.5*np.matmul(invSigmasT,np.matmul(S_poolT,invSigmasT)) #(K,D,D,B)
        
        return madmix_gmm_flatten(grad_logw,grads_logmu,grads_logsigma) # out: (K',B)
    return mygrad_lp

In [13]:
y=dat
K=2
xd=np.random.randint(low=0,high=2,size=(y.shape[0],2))
ws_=np.array([[0.6,0.6],[0.4,0.4]])
mus_=np.zeros((2,2,2))
for b in range(2): mus_[:,:,b]=np.array([[2,60],[4.5,80]])
sigmas_=np.zeros((2,2,2,2))
for k in range(2):
    for b in range(2):
        sigmas_[k,:,:,b]=np.eye(2)

xc=madmix_gmm_flatten(ws_,mus_,sigmas_)

In [22]:
np.exp(lp(xd,xc,axis=0))

array([[2.79888842e-79, 1.00000000e+00],
       [2.79888842e-79, 1.00000000e+00]])

## Old Faithful

In [4]:
####################
####################
#  data wrangling  #
####################
####################
of_dat=pd.read_table('https://gist.githubusercontent.com/curran/4b59d1046d9e66f2787780ad51a1cd87/raw/9ec906b78a98cf300947a37b56cfe70d01183200/data.tsv')
dat=np.array(of_dat)
N,D=dat.shape