# One Way Normal Model in Edward

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from timeit import default_timer as timer
import numpy as np
import pickle
import tensorflow as tf

import edward as ed
from edward.models import HalfNormal, Normal, Empirical, InverseGamma

from utils import generate_datasets, SEED, I, SIGMA, MU

In [None]:
ed.set_seed(42) 

## 1. Models

In [None]:
# Centered
def edward_model_c(y, I=I, sigma=[SIGMA] * I):
  mu_e = Normal(0., 5.)
  tau_e = HalfNormal(5.) 
  theta_e = Normal(tf.ones(I) * mu_e, tf.ones(I) * tau_e)
  sigma_e = tf.placeholder(tf.float32, I)
  y_e = Normal(theta_e, sigma_e)
  data = {y_e: y, sigma_e: sigma}
    
  return mu_e, tau_e, theta_e, y_e, data
  
# Non centered  
def edward_model_nc(y, I=I, sigma=[SIGMA] * I):
  mu_e = Normal(0., 5.)
  tau_e = HalfNormal(5.)
    
  vtheta_e = Normal(tf.zeros(I), tf.ones(I))
  sigma_e = tf.placeholder(tf.float32, I)
  y_e = Normal(mu_e + vtheta_e*tau_e, sigma_e)
  data = {y_e: y, sigma_e: sigma}
    
  return mu_e, tau_e, vtheta_e, y_e, data 

## 2. Inference

### HMC

In [None]:
def _Ed_HMC_c(y, iters=400000, burn=20000, thin=1000):
  """
  Runs Edward's HMC algorithm for centered parameterization, for one seed
  Default configuration adapted from Stan NUTS configuration used in the paper
  
  Note: runs one longer chain, instead of four
  """
  mu_e, tau_e, theta_e, y_e, data = edward_model_c(y)
  q_mu = Empirical(params=tf.Variable(tf.zeros(iters)))
  q_tau = Empirical(params=tf.Variable(tf.zeros(iters)))
  
  inference = ed.HMC({tau_e: q_tau, mu_e: q_mu}, data=data)
  start = timer()
  inference.run()
  end = timer()
  
  return q_mu.params.eval()[burn:][::thin], q_tau.params.eval()[burn:][::thin], end-start

In [None]:
def Ed_HMC_c(iters=400000, burn=20000, thin=1000, seeds=SEED):
  """
  Runs Edward's HMC algorithm for centered parameterization for each seed
  Default configuration adapted from Stan NUTS configuration used in the paper
  """
  Y, theta = generate_datasets(seeds=seeds)
  for seed,y in zip(seeds, Y):
    print(seed)
    q_mu, q_tau, time = _Ed_HMC_c(y, iters, burn, thin)
    results = {'time': time, 'mu': q_mu, 'tau': q_tau, 'iters': iters, 'burn': burn, 'thin': thin}
    with open('results/edward/hmc_c_{}.pkl'.format(seed), 'wb') as f:
      pickle.dump(results, f)
  print('Done')

In [None]:
# centered
Ed_HMC_c()

In [None]:
def _Ed_HMC_nc(y, iters=200000, burn=20000):
  """
  Runs Edward's HMC algorithm for centered parameterization, for one seed
  Default configuration adapted from nominal Stan NUTS configuration used in the paper,
  to use baseline: iters=400000, burn=20000
  
  Note: runs one longer chain, instead of four
  """
  mu_e, tau_e, vtheta_e, y_e, data = edward_model_nc(y)
  q_mu = Empirical(params=tf.Variable(tf.zeros(iters)))
  q_tau = Empirical(params=tf.Variable(tf.zeros(iters)))
  
  inference = ed.HMC({tau_e: q_tau, mu_e: q_mu}, data=data)
  start = timer()
  inference.run()
  end = timer()
  
  return q_mu.params.eval()[burn:], q_tau.params.eval()[burn:], end-start

In [None]:
def Ed_HMC_nc(iters=200000, burn=20000, mode='nominal', seeds=SEED):
  """
  Runs Edward's HMC algorithm for centered parameterization
  Default configuration adapted from nominal Stan NUTS configuration used in the paper,
  to use baseline: iters=400000, burn=20000, mode='baseline'
  """
  Y, theta = generate_datasets(seeds=seeds)
  for seed,y in zip(seeds, Y):
    print(seed)
    q_mu, q_tau, time = _Ed_HMC_nc(y, iters, burn)
    results = {'time': time, 'mu': q_mu, 'tau': q_tau, 'iters': iters, 'burn': burn}
    with open('results/edward/hmc_nc_{}_{}.pkl'.format(mode, seed), 'wb') as f:
      pickle.dump(results, f)
  print('Done')

In [None]:
# Nominal
Ed_HMC_nc()

In [None]:
# Baseline 
Ed_HMC_nc(iters=400000, burn=20000, mode='baseline')

## KLqp

In [None]:
def _Ed_VI_c(y, iters):
  """
  Runs Edward's ADVI algorithm, for one seed, using non-cenetered parameterization 
  """
  mu_e, tau_e, theta_e, y_e, data = edward_model_c(y)
  
  with tf.variable_scope('q_mu', reuse=tf.AUTO_REUSE):
    q_mu = Normal(tf.get_variable('loc', []),
                    tf.nn.softplus(tf.get_variable('scale', [])))
  with tf.variable_scope('q_tau', reuse=tf.AUTO_REUSE):
    q_tau = HalfNormal(tf.nn.softplus(tf.get_variable('scale', [])))

  inference = ed.KLqp({mu_e: q_mu, tau_e: q_tau}, data=data)
  start = timer()
  inference.run(n_iter=iters)
  end = timer()
  
  return q_mu, q_tau, end-start

In [None]:
def _Ed_VI_nc(y, iters):
  """
  Runs Edward's ADVI algorithm, for one seed, using cenetered parameterization 
  """
  mu_e, tau_e, v_theta_e, y_e, data = edward_model_nc(y)
  
  with tf.variable_scope('q_mu', reuse=tf.AUTO_REUSE):
    q_mu = Normal(tf.get_variable('loc', []),
                    tf.nn.softplus(tf.get_variable('scale', [])))
  with tf.variable_scope('q_tau', reuse=tf.AUTO_REUSE):
    q_tau = HalfNormal(tf.nn.softplus(tf.get_variable('scale', [])))

  inference = ed.KLqp({mu_e: q_mu, tau_e: q_tau}, data=data)
  start = timer()
  inference.run(n_iter=iters)
  end = timer()
  
  return q_mu, q_tau, end-start

In [None]:
def Ed_VI(mode='c', seeds=SEED):
  """
  Runs Edward's KLqp algorithm
  If mode is 'c', use the centered parameterization
  If mode is 'nc', use the non-centered parameterization
  """
  if mode not in ['c', 'nc']:
    raise "Mode must be 'c' for centered or 'nc' for non centered"
  Y, theta = generate_datasets(seeds=seeds)
  for y,seed in zip(Y, seeds):
    print(seed)
    iters = np.linspace(50000, 500000, 5).astype(int)
    for n in iters:
      q_mu, q_tau, time = _Ed_VI_c(y, n) if mode == 'c' else _Ed_VI_nc(y, n)
      results = {'time': time, 'iters': n, 'mu': q_mu.sample(1000).eval()[:,None], 'tau': q_tau.sample(1000).eval()[:,None]} 
          
      with open('results/edward/vi_{}_{}.pkl'.format(mode, seed), 'ab') as f:
        pickle.dump(results, f)
    print('Done')

In [None]:
# Centered
Ed_VI()

In [None]:
# Non-centered
Ed_VI(mode='nc')

### References

[1] Betancourt, Michael J. and Girolami, Mark. Hamiltonian Monte Carlo for Hierarchical Models. 2013.