In [242]:
'''
!pip install numpy
!pip install scipy
!pip install seaborn
!pip install matplotlib
'''

'\n!pip install numpy\n!pip install scipy\n!pip install seaborn\n!pip install matplotlib\n'

In [706]:
import numpy as np
import scipy as sp

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

## Data Generation

In [749]:
sigma = 3
num_clusters = 3
sample_size = 1000


In [750]:
SEED = 2

In [751]:
np.random.seed(SEED)

cluster_mus = np.random.normal(size=num_clusters, loc=0, scale=sigma)
cluster_choices = np.random.choice(num_clusters, 
                            size=sample_size, 
                            p=[1/num_clusters]*num_clusters)
sample_mus = [cluster_mus[cluster_choice] for cluster_choice in cluster_choices]
X = np.random.normal(size=sample_size, loc=sample_mus, scale=1)

In [752]:
cluster_mus

array([-1.25027354, -0.16880048, -6.40858829])

## Model

In [871]:
class VIGMM:
    
    """
    Variational Inference Gaussian Mixture Model
    """

    def __init__(self, 
                num_clusters,
                sample_size,
                mu_k_mu_prior=0,
                mu_k_sigma_prior=1,
                s2_k_alpha_prior=5,
                random_seed=1):
        
        self._num_clusters = num_clusters
        self._sample_size = sample_size
        
        self._mu_k_mu_prior = mu_k_mu_prior
        self._mu_k_sigma_prior = mu_k_sigma_prior
        self._s2_k_alpha_prior = s2_k_alpha_prior
        self._random_seed = random_seed
                
        self._params = None
        
    @property
    def params(self):
        
        return self._params
        
    def _elbo(self, mu_k, s2_k, phis, X):
        
        t = s2_k + np.square(mu_k)
        a = -(1/(2*(self._mu_k_sigma_prior**2)))*sum(t)
        b = 2*sum(sum((phis*mu_k)*X[:,np.newaxis])) - 0.5*sum(sum(phis*t))
        c = -0.5*sum(np.log(2*np.pi*s2_k))
        d = sum(sum(phis*np.log(phis)))
        return a+b+c+d
        
    def fit(self, X, num_iterations=100):
    
        np.random.seed(self._random_seed)
        
        mu_k = np.random.normal(size=self._num_clusters, 
                                loc = self._mu_k_mu_prior, 
                                scale = self._mu_k_sigma_prior**2) 
        s2_k = np.random.gamma(size=self._num_clusters, 
                               shape=self._s2_k_alpha_prior)
        phis = np.random.dirichlet(size=self._sample_size, 
                                   alpha=np.repeat(1, self._num_clusters))
        elbos_res = np.zeros(num_iterations+1)

        elbos_res[0] = self._elbo(mu_k, s2_k, phis, X)

        for i in range(num_iterations):
            phis_updated = np.zeros((self._sample_size, self._num_clusters))
            for j in range(self._sample_size):
                # note: this update can quickly becoming unstable for larger mu_ks
                # beware of larger mu_k priors that would lead to larger mu_ks
                phis_updated[j] = np.exp(X[j]*mu_k - 0.5*(s2_k + np.square(mu_k)))
                phis_updated[j] = phis_updated[j] / np.sum(phis_updated[j])
            phis = phis_updated

            mu_k_updated = np.zeros(num_clusters)
            s2_k_updated = np.zeros(num_clusters)
            for k in range(self._num_clusters):
                s2_k_updated[k] = 1/(1/(self._mu_k_sigma_prior**2)+sum(phis[:,k]))
                mu_k_updated[k] = sum(phis[:,k]*X) / ((1/(self._mu_k_sigma_prior**2)) + sum(phis[:,k]))
            s2_k = s2_k_updated
            mu_k = mu_k_updated

            elbos_res[i+1] = self._elbo(mu_k, s2_k, phis, X)
            if i % 10 == 0:
                print(f"Iteration: {i+1}:", "ELBO Difference: ", np.abs(elbos_res[i+1]-elbos_res[i-1]))
            if np.abs(elbos_res[i+1]-elbos_res[i-1]) < 1e-10:
                break
        
        self._params = {"mu":mu_k,
                       "sigma_2":s2_k,
                       "phi":phis}
    
    

In [872]:
vigmm = VIGMM(num_clusters, sample_size)
vigmm.fit(X)

Iteration: 0: ELBO Difference:  9820.513538516769
Iteration: 10: ELBO Difference:  12.259831111761741
Iteration: 20: ELBO Difference:  0.529372054090345
Iteration: 30: ELBO Difference:  0.02592066839861218
Iteration: 40: ELBO Difference:  0.001276814906304935
Iteration: 50: ELBO Difference:  6.291257159318775e-05
Iteration: 60: ELBO Difference:  3.099929017480463e-06
Iteration: 70: ELBO Difference:  1.5270052244886756e-07
Iteration: 80: ELBO Difference:  7.548806024715304e-09
Iteration: 90: ELBO Difference:  3.4924596548080444e-10


In [873]:
vigmm.params['mu']


array([-0.29747623, -6.29623722, -1.17959657])

In [874]:
cluster_mus

array([-1.25027354, -0.16880048, -6.40858829])

# Experimental

In [875]:
class VIGMM:
    
    """
    Variational Inference Gaussian Mixture Model
    """

    def __init__(self, 
                num_clusters,
                sample_size,
                mu_k_mu_prior=0,
                mu_k_sigma_prior=1,
                s2_k_alpha_prior=5,
                random_seed=1):
        
        self._num_clusters = num_clusters
        self._sample_size = sample_size
        
        self._mu_k_mu_prior = mu_k_mu_prior
        self._mu_k_sigma_prior = mu_k_sigma_prior
        self._s2_k_alpha_prior = s2_k_alpha_prior
        self._random_seed = random_seed
                
        self._params = None
        
    @property
    def params(self):
        
        return self._params
        
    def _elbo(self, mu_k, s2_k, phis, X):
        
        t = s2_k + np.square(mu_k)
        a = -(1/(2*(self._mu_k_sigma_prior**2)))*sum(t)
        b = 2*sum(sum((phis*mu_k)*X[:,np.newaxis])) - 0.5*sum(sum(phis*t))
        c = -0.5*sum(np.log(2*np.pi*s2_k))
        d = sum(sum(phis*np.log(phis)))
        return a+b+c+d
        
    def fit(self, X, num_iterations=100):
    
        np.random.seed(self._random_seed)
        
        mu_k = np.random.normal(size=self._num_clusters, 
                                loc = self._mu_k_mu_prior, 
                                scale = self._mu_k_sigma_prior**2) 
        s2_k = np.random.gamma(size=self._num_clusters, 
                               shape=self._s2_k_alpha_prior)
        phis = np.random.dirichlet(size=self._sample_size, 
                                   alpha=np.repeat(1, self._num_clusters))
        elbos_res = np.zeros(num_iterations+1)

        elbos_res[0] = self._elbo(mu_k, s2_k, phis, X)

        for i in range(num_iterations):
            phis_updated = np.zeros((self._sample_size, self._num_clusters))
            for j in range(self._sample_size):
                # note: this update can quickly becoming unstable for larger mu_ks
                # beware of larger mu_k priors that would lead to larger mu_ks
                phis_updated[j] = np.exp(X[j]*mu_k - 0.5*(s2_k + np.square(mu_k)))
                phis_updated[j] = phis_updated[j] / np.sum(phis_updated[j])
            phis = phis_updated

            mu_k_updated = np.zeros(num_clusters)
            s2_k_updated = np.zeros(num_clusters)
            for k in range(self._num_clusters):
                s2_k_updated[k] = 1/(1/(self._mu_k_sigma_prior**2)+sum(phis[:,k]))
                mu_k_updated[k] = sum(phis[:,k]*X) / ((1/(self._mu_k_sigma_prior**2)) + sum(phis[:,k]))
            s2_k = s2_k_updated
            mu_k = mu_k_updated

            elbos_res[i+1] = self._elbo(mu_k, s2_k, phis, X)
            if i % 10 == 0:
                print(f"Iteration: {i+1}:", "ELBO Difference: ", np.abs(elbos_res[i+1]-elbos_res[i-1]))
            if np.abs(elbos_res[i+1]-elbos_res[i-1]) < 1e-10:
                break
        
        self._params = {"mu":mu_k,
                       "sigma_2":s2_k,
                       "phi":phis}
    
    

In [876]:
vigmm = VIGMM(num_clusters, sample_size)
vigmm.fit(X)

Iteration: 1: ELBO Difference:  9820.513538516769
Iteration: 11: ELBO Difference:  12.259831111758103
Iteration: 21: ELBO Difference:  0.529372054090345
Iteration: 31: ELBO Difference:  0.0259206683949742
Iteration: 41: ELBO Difference:  0.001276814906304935
Iteration: 51: ELBO Difference:  6.291257159318775e-05
Iteration: 61: ELBO Difference:  3.099929017480463e-06
Iteration: 71: ELBO Difference:  1.5270779840648174e-07
Iteration: 81: ELBO Difference:  7.548806024715304e-09
Iteration: 91: ELBO Difference:  3.4560798667371273e-10


In [877]:
vigmm.params['mu']


array([-0.29747623, -6.29623722, -1.17959657])

In [878]:
cluster_mus

array([-1.25027354, -0.16880048, -6.40858829])