# Good examples and tips in Edward

Some code that works and is useful, that I didn't want to throw away.

In [27]:
import numpy as np
import tensorflow as tf
import edward as ed
from pprint import pprint

from edward.models import Categorical, Dirichlet, Uniform, Mixture
from edward.models import Bernoulli, Normal
%matplotlib inline
import matplotlib.pyplot as plt

**Dirichlet Categorical, KLqp**

In [2]:
# code from edward/examples/dirichlet_categorical.py:
# Inferring a categorical distribution with KLqp
tf.reset_default_graph()
sess = tf.InteractiveSession()
K = 4
N = 30
# DATA
pi_true = np.random.dirichlet(np.array([20.0, 30.0, 10.0, 10.0]))
z_data = np.array([np.random.choice(K, 1, p=pi_true)[0]
                   for n in range(N)])
print("pi: {}".format(pi_true))

# MODEL
pi = ed.models.Dirichlet(tf.ones(4))
z = ed.models.Categorical(probs=pi, sample_shape=N)

# INFERENCE
qpi = ed.models.Dirichlet(tf.nn.softplus(
    tf.get_variable("qpi/concentration", [K])))

inference = ed.KLqp({pi: qpi}, data={z: z_data})
inference.run(n_iter=1500, n_samples=30)

# sess = ed.get_session()
print("Inferred pi: {}".format(sess.run(qpi.mean())))

pi: [0.25715353 0.46914549 0.15873922 0.11496176]


  not np.issubdtype(value.dtype, np.float) and \
  not np.issubdtype(value.dtype, np.int) and \


1500/1500 [100%] ██████████████████████████████ Elapsed: 7s | Loss: 36.726
Inferred pi: [0.4226108  0.4012063  0.07155747 0.10462539]


**Linear regression, SGHMC**

In [6]:
# example from examples/bayesian_linear_regression_sghmc.py
tf.reset_default_graph()
sess = tf.InteractiveSession()
def build_toy_dataset(N, noise_std=0.5):
    X = np.concatenate([np.linspace(0, 2, num=N / 2),
                        np.linspace(6, 8, num=N / 2)])
    y = 2.0 * X + 10 * np.random.normal(0, noise_std, size=N)
    X = X.astype(np.float32).reshape((N, 1))
    y = y.astype(np.float32)
    return X, y

N = 40  # number of data points
D = 1  # number of features
# DATA
X_train, y_train = build_toy_dataset(N)
X_test, y_test = build_toy_dataset(N)

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = ed.models.Normal(loc=tf.zeros(D), scale=tf.ones(D))
b = ed.models.Normal(loc=tf.zeros(1), scale=tf.ones(1))
y = ed.models.Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N))

# INFERENCE
T = 5000                        # Number of samples.
nburn = 100                     # Number of burn-in samples.
stride = 10                     # Frequency with which to plot samples.
qw = ed.models.Empirical(params=tf.Variable(tf.random_normal([T, D])))
qb = ed.models.Empirical(params=tf.Variable(tf.random_normal([T, 1])))

inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
inference.run(step_size=1e-3)

  """
  


5000/5000 [100%] ██████████████████████████████ Elapsed: 8s | Acceptance Rate: 1.000


**Mixture Gaussian, MH**

In [42]:
# https://github.com/blei-lab/edward/blob/master/examples/mixture_gaussian_mh.py

## Building the Markov Chain:

**Mock data used:**

In [23]:
y_data = ([0] * 10) + ([1] * 10) + ([2] * 10)
# for each categorical var y, he associated this matrix:
np.array(y_data)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2])

### Version 1: [doesn't work] HMM, with Transition matrix + no Dirichlet + TF loop

= initial code form Github issue

**MODEL**

In [27]:
chain_len = 30
n_hidden = 3
n_obs = 3
x_0 = Categorical(probs=tf.fill([n_hidden], 1.0 / n_hidden))

# transition matrix
T = Uniform(tf.zeros([n_hidden, n_hidden]), tf.ones([n_hidden, n_hidden]))
T /= tf.reduce_sum(T, axis=0, keep_dims=True)

# emission matrix
E = Uniform(tf.zeros([n_obs, n_hidden]), tf.ones([n_obs, n_hidden]))
E /= tf.reduce_sum(E, axis=0, keep_dims=True)

# model
y_val = tf.placeholder(tf.int32, [chain_len])
x = tf.scan(lambda x_tm1, _: Categorical(probs=T[:, x_tm1]),
            y_val, initializer=x_0)
y = tf.map_fn(lambda x_t: Categorical(probs=E[:, x_t]), x)

**INFERENCE**

In [29]:
# inference
qT = tf.Variable(Uniform(tf.zeros([n_hidden, n_hidden]), tf.ones([n_hidden, n_hidden])))
qT /= tf.reduce_sum(qT, axis=0, keep_dims=True)
qE = tf.Variable(Uniform(tf.zeros([n_obs, n_hidden]), tf.ones([n_obs, n_hidden])))
qE /= tf.reduce_sum(qE, axis=0, keep_dims=True)
px_init = tf.Variable(tf.random_uniform([chain_len, n_hidden]))
px_init /= tf.reduce_sum(px_init, axis=0, keep_dims=True)
qx = Categorical(probs=px_init)

y_data = np.array(([0] * 10) + ([1] * 10) + ([2] * 10), dtype=np.int32)

inference = ed.KLqp({T: qT, E: qE, x: qx}, {y_val: y_data})
try:
    inference.run()
except Exception as e:
    print(e)

'Tensor' object has no attribute 'reparameterization_type'


This seems similar to the github issue (**but not exactly the same error as back then)**. It's because many of these objects are not instances of RandomVariable... If we dig in more into how Edward works we might understand why exactly and if there is anyway to avoid this problem.

### Version 2: [works and converges] HMM, with Transition matrix + no Dirichlet + Python loop

**MODEL**

In [3]:
# from issue https://github.com/blei-lab/edward/issues/450
chain_len = 30
n_hidden = 3
n_obs = 3

x_0 = Categorical(probs=tf.fill([n_hidden], 1.0 / n_hidden))

# transition matrix
T = tf.nn.softmax(tf.Variable(tf.random_uniform([n_hidden, n_hidden])), dim=0)

# emission matrix
E = tf.nn.softmax(tf.Variable(tf.random_uniform([n_hidden, n_obs])), dim=0)

# MODEL
x = []
y = []
for _ in range(chain_len):
    x_tm1 = x[-1] if x else x_0
    x_t = Categorical(probs=T[x_tm1, :])
    y_t = Categorical(probs=E[x_t, :])
    x.append(x_t)
    y.append(y_t)

Instructions for updating:
dim is deprecated, use axis instead


**INFERENCE**

In [8]:
# INFERENCE
qx = [Categorical(probs=tf.nn.softmax(tf.Variable(tf.ones(n_hidden))))
      for _ in range(chain_len)]

y_data = ([0] * 10) + ([1] * 10) + ([2] * 10)
y_data = map(np.array, y_data)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(T))

    inference = ed.KLqp(dict(zip(x, qx)), dict(zip(y, y_data)))
    inference.run(n_iter=5000)

    inferred_T, inferred_E = sess.run([T, E])
    inferred_qx = sess.run([foo.probs for foo in qx])
    inferred_y_probs = sess.run([foo.probs for foo in y])
    print(inferred_T)
    print(inferred_E)

[[0.38294753 0.33900872 0.3363336 ]
 [0.3647698  0.21131545 0.43748775]
 [0.2522827  0.44967577 0.22617862]]


  not np.issubdtype(value.dtype, np.float) and \
  not np.issubdtype(value.dtype, np.int) and \


5000/5000 [100%] ██████████████████████████████ Elapsed: 41s | Loss: 10.566
[[0.89333415 0.03524946 0.10075802]
 [0.10272316 0.92825365 0.00374841]
 [0.00394264 0.03649681 0.8954936 ]]
[[0.0030673  0.99385744 0.00242128]
 [0.9951315  0.00379708 0.00150492]
 [0.00180128 0.00234548 0.99607384]]


**Note:** this example seems to converge to something better that whay the guy said in the github example. Also I made switched column indexing to rows. **Sometimes it doesn't seem to actually converge. When it converges, it reaches a loss between 7 and 10**. 5K iterations seems to be enough. Less seemed not to converge but not sure if by re-running it wouldn't do better...

*Given my 3 hidden states, 3 observation types, and long changes of identical observations, I expect transition matrix to be close to diagonal and the emission matrix to look like a permutation matrix. I see non-converging loss info. (the guy had a loss around 40, for 10K iterations)*, *non-uniform state probability distributions, and very uniform observation probabilities. Any idea what's going wrong in my setup or the solving of the problem?*

Transitions: you almost always stay in the same state. Emission: you almost always go to the same state, but it doesn't have to be the same number.

**Using the external loop like this seems to work, fixing the length of the chains is not a big problem (anyway at some point LC stops the loan anyway so they cannot run indefinitely), we could do that while thinking of how to make it more efficient inside TF.**

### Version 3: [works but bad results] Regular MM, with Transition matrix + no Dirichlet + Python loop

= Version 2 but without hidden states

**MODEL**

In [12]:
# from issue https://github.com/blei-lab/edward/issues/450
chain_len = 30
n_obs = 3

x_0 = Categorical(probs=tf.fill([n_obs], 1.0 / n_obs))

# transition matrix
T = tf.nn.softmax(tf.Variable(tf.random_uniform([n_obs, n_obs])), dim=0)

# no more emissions, we observe directly the hidden states x

# MODEL
x = []
for _ in range(chain_len):
    x_tm1 = x[-1] if x else x_0
    x_t = Categorical(probs=T[:, x_tm1])
    x.append(x_t)

**INFERENCE**

In [14]:
# INFERENCE
qx = [Categorical(probs=tf.nn.softmax(tf.Variable(tf.ones(n_obs))))
      for _ in range(chain_len)]

x_data = ([0] * 10) + ([1] * 10) + ([2] * 10)
x_data = map(np.array, x_data)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(T))

    inference = ed.KLqp(dict(zip(x, qx)), dict(zip(x, x_data)))
    inference.run(n_iter=5000)

    inferred_T = sess.run(T)
    inferred_qx = sess.run([foo.probs for foo in qx])
    inferred_y_probs = sess.run([foo.probs for foo in y])
    print(inferred_T)

[[0.35989723 0.2875302  0.46658668]
 [0.24342616 0.38703847 0.25333694]
 [0.3966766  0.32543132 0.2800764 ]]
5000/5000 [100%] ██████████████████████████████ Elapsed: 38s | Loss: 0.093
[[0.31659073 0.31191257 0.3038696 ]
 [0.4490798  0.44587556 0.45165172]
 [0.23432946 0.24221194 0.24447864]]


### Version 4: [don't know if works] HMM, without Transition matrix + Dirichlet + Python loop

= this is "idea 1" on the github issue, that I'm trying to get working.

*(1) make T and E a list of Categorical(Dirichlet(...)), and replace loop bodies to return a Mixture of T and E according to x_t or x_tm1. This didn't work because x_t lost it's type information in the loop and caused error TypeError: cat must be a Categorical distribution, but saw: Tensor("scan/while/Identity_1:0", shape=(), dtype=int32)*

**TODO** why a mixture? If we're doing explicit hard assignment to hidden states?

This doesn't work, because you can index a tf variable by Categorical (because it'll just figure out the output of the categorical when running the graph) and index by that value, but you can't do it with a list:

**MODEL**

In [17]:
chain_len = 30
n_hidden = 3
n_obs = 3

x_0 = Categorical(Dirichlet(tf.ones(n_hidden)))

# transition matrix
T = [Categorical(Dirichlet(tf.ones(n_hidden))) for i in range(n_hidden)]

# emission matrix
E = [Categorical(Dirichlet(tf.ones(n_obs))) for i in range(n_hidden)]

# MODEL
x = []
y = []
for _ in range(chain_len):
    x_tm1 = x[-1] if x else x_0
    x_t = T[x_tm1]
    y_t = E[x_t]
    # x_t = Categorical(probs=T[x_tm1, :])
    # y_t = Categorical(probs=E[x_t, :])
    x.append(x_t)
    y.append(y_t)

TypeError: list indices must be integers or slices, not Categorical

Wikipedia: *A hidden Markov model can be considered a generalization of a mixture model where the hidden variables (or latent variables), which control the mixture component to be selected for each observation, are related through a Markov process rather than independent of each other.*

In [44]:
chain_len = 30
n_hidden = 3
n_obs = 3

x_0 = Categorical(Dirichlet(tf.ones(n_hidden)))

# transition matrix
T = [Categorical(Dirichlet(tf.ones(n_hidden))).value() for i in range(n_hidden)]

# emission matrix
E = [Categorical(Dirichlet(tf.ones(n_obs))).value() for i in range(n_hidden)]

# MODEL
x = []
y = []
for _ in range(chain_len):
    x_tm1 = x[-1] if x else x_0
    x_t = tf.gather(T, x_tm1)
    # problem: by running tf.gather, we loose the type Categorical
    # and we are doing inference on x_t itself
    # (it would only be ok to use tf.gather if it was to parametrize smth,
    # not if it has to be a RV itself)
    y_t = tf.gather(E, x_t)
    # x_t = Categorical(probs=T[x_tm1, :])
    # y_t = Categorical(probs=E[x_t, :])
    x.append(x_t)
    y.append(y_t)

In [45]:
# INFERENCE
qx = [Categorical(probs=tf.nn.softmax(tf.Variable(tf.ones(n_obs))))
      for _ in range(chain_len)]

x_data = ([0] * 10) + ([1] * 10) + ([2] * 10)
x_data = map(np.array, x_data)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    inference = ed.KLqp(dict(zip(x, qx)), dict(zip(x, x_data)))
    inference.run(n_iter=5000)

    inferred_T = sess.run(T)
    inferred_qx = sess.run([foo.probs for foo in qx])
    inferred_y_probs = sess.run([foo.probs for foo in y])

[2, 1, 2]


  not np.issubdtype(value.dtype, np.float) and \
  not np.issubdtype(value.dtype, np.int) and \


AttributeError: 'Tensor' object has no attribute 'log_prob'

### Version 5: [doesn't work] code from other github example, good for building the model but inference will probably not work

In [38]:
# https://gist.github.com/fredcallaway/c7252b6326dfb502e70cad4146731aef
# fixed the code to work with newer version of Edward
"""Hidden Markov Models
Abstract base class for HMMs and an implementation of an HMM
with discrete states and gaussian emissions.
"""

def categorical(ps):
    return Categorical(probs=ps).value()

def flip(p):
    return tf.equal(Bernoulli(probs=p), tf.constant(1))

def append(lst, x):
    return tf.concat(0, [lst, [x]])


class HMM(object):
    """A Hidden Markov Model."""

    def step(self, state):
        """Returns a new state following `state`."""
        raise NotImplementedError()

    def emit(self, state):
        """Returns an observable emission from `state`."""
        raise NotImplementedError()

    def init(self):
        """Returns an intial state."""
        raise NotImplementedError()

    def final(self, state):
        """Returns true if the model should stop in `state`."""
        raise NotImplementedError()
    
    def sample(self):
        def cond(states, emissions):
            s0 = states[-1]
            return self.final(s0) # TODO not final
        
        def body(states, emissions):
            s0 = states[-1]
            s1 = self.step(s0)
            e1 = self.emit(s1)
            return append(states, s1), append(emissions, e1)

        s0 = self.init()
        e0 = self.emit(s0)
        states = tf.convert_to_tensor([s0])
        emissions = tf.convert_to_tensor([e0])
        return tf.while_loop(
            cond, body, 
            loop_vars=[states, emissions],
            shape_invariants=[tf.TensorShape(None), tf.TensorShape(None)]
        )

In [39]:
class DiscreteGaussianHMM(HMM):
    """HMM with discrete transitions and gaussian emissions."""
    def __init__(self, P, mu, sigma, p_init, p_final):
        super().__init__()
        self.P = P
        self.mu = mu
        self.sigma = sigma
        self.p_init = p_init
        self.p_final = p_final

    def step(self, state):
        return categorical(self.P[state])

    def emit(self, state):
        return Normal(loc=self.mu[state], scale=self.sigma[state]).value()

    def init(self):
        return categorical(self.p_init)

    def final(self, state):
        return flip(1 - tf.gather(self.p_final, state))

In [40]:
P = tf.constant(np.array([
        [.6, .4],
        [.2, .8],
    ], dtype='float32'))
mu = tf.constant([5., -5.])
sigma = tf.constant([1., 1.])
model = DiscreteGaussianHMM(P, mu, sigma, [0.5, 0.5], [0.02, 0.02])

In [41]:
sess = ed.get_session()
emissions = [sess.run(model.sample()[1]) for _ in range(3)]
for e in emissions:
    plt.plot(e)
plt.show()

ValueError: Shapes (2, 1) and () are incompatible