# Gaussian Mixture Model in Edward

In [None]:
from timeit import default_timer as timer
import numpy as np
import pickle

import edward as ed
import tensorflow as tf
from edward.models import Dirichlet, InverseGamma, Normal, ParamMixture, Empirical, Categorical

from utils import SEED, generate_datasets

In [None]:
ed.set_seed(42)

## 1. Data

In [None]:
def load_datasets(filename='gmm_6k.pkl', n=3):
  """
  :param filename: name of the pickle file 
  :param n: number of datasets to read (defults to 3)
  :return: list of loaded datasets in dict format
  """
  datasets = []
  with open(filename, 'rb') as f:
    for i in range(n):
      dataset = pickle.load(f)
      datasets.append(dataset)      
  return datasets

## 2. Model

Adapted from [1]

In [None]:
def edward_model(K, N):
  w_e = Dirichlet(tf.ones(K))
  mu_e = Normal(tf.zeros(1), tf.ones(1)*10, sample_shape=K)
  sigmasq_e = InverseGamma(tf.ones(1), tf.ones(1), sample_shape=K)
  y_e = ParamMixture(w_e, {'loc': mu_e, 'scale_diag': tf.sqrt(sigmasq_e)},
                   MultivariateNormalDiag,
                   sample_shape=N)
  z_e = y_e.cat
  
  return w_e, mu_e, sigmasq_e, y_e, z_e

## 3. Inference

### Gibbs sampler

In [None]:
def _ed_Gibbs(K, N, y, iters=20000, burn=8000, thin=10):
  w_e, mu_e, sigmasq_e, y_e, z_e = edward_model(K, N)
  
  with tf.variable_scope('qw', reuse=tf.AUTO_REUSE):
    qw = Empirical(tf.get_variable(
        "qw/params", [iters, K],
        initializer=tf.constant_initializer(1.0 / K)))
  with tf.variable_scope('qmu', reuse=tf.AUTO_REUSE):
    qmu = Empirical(tf.get_variable(
        "qmu/params", [iters, K, 1],
        initializer=tf.zeros_initializer()))
  with tf.variable_scope('qsigmasq', reuse=tf.AUTO_REUSE):  
    qsigmasq = Empirical(tf.get_variable(
        "qsigmasq/params", [iters, K, 1],
        initializer=tf.ones_initializer()))
  with tf.variable_scope('qz', reuse=tf.AUTO_REUSE): 
    qz = Empirical(tf.get_variable(
        "qz/params", [iters, N],
        initializer=tf.zeros_initializer(),
        dtype=tf.int32))

  inference = ed.Gibbs({w_e: qw, mu_e: qmu, sigmasq_e: qsigmasq, z_e: qz},
                     data={y_e: y[:,None]})
  
  sess = ed.get_session()
  tf.global_variables_initializer().run()
  
  start = timer()
  inference.run()
  end = timer() 
  
  return qw.params.eval()[burn:][::thin], qmu.params.eval()[burn:][::thin], \
    qsigmasq.params.eval()[burn:][::thin], qz.params.eval()[burn:][::thin], end-start

In [None]:
def ed_Gibbs(filename='gmm_6k.pkl', n=3, iters=20000, burn=8000, thin=10):
  """
  Runs Edward Gibbs algorithm.
  Default 20000 iterations, 8000 burn in, 10 thin, big dataset (6 components)
  Pass gmm_3k.pkl for small.
  """
  datasets = load_datasets(filename, n)
  
  for dataset in datasets:
    qw, qmu, qsigmasq, qz, time = _ed_Gibbs(dataset['K'], dataset['N'], dataset['y'], iters=iters, burn=burn, thin=thin)
    
    results = {'w': qw, 'mu':qmu, 'sigmasq': qsigmasq, 'z': qz, 'time': time, 'iters': iters, 'warmup': burn, 'thin': thin}
    with open('results/edward/gibbs_{}k_{}.pkl'.format(dataset['K'], dataset['seed']), 'wb') as f:
      pickle.dump(results, f)

In [None]:
ed_Gibbs(filename='gmm_3k.pkl')

In [None]:
ed_Gibbs(filename='gmm_6k.pkl')

## References

[1] Edward Tutorials: [Unupervised Learning](http://edwardlib.org/tutorials/unsupervised)