## Homogenous (stationary) Markov Chain Implementation in Edward

### Package Imports and Options

In [7]:
from os import getcwd
import glob
from pprint import pprint

import pandas as pd
import numpy as np
from sklearn import preprocessing

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

import tensorflow as tf
import edward as ed
from edward.models import Bernoulli, Categorical, Normal

In [2]:
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', -1)

### Load Data

In [4]:
dic = pd.read_excel('data/LCDataDictionary.xlsx')

In [5]:
csv_files = glob.glob(f'{getcwd()}/data/*.csv')
df_raw = pd.concat((pd.read_csv(f, header=1, low_memory=False) for f in csv_files))

In [6]:
df = df_raw

In [None]:
# resources
# http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture11.pdf
# https://github.com/blei-lab/edward/issues/450
# https://gist.github.com/fredcallaway/c7252b6326dfb502e70cad4146731aef
# https://discourse.edwardlib.org/t/a-simple-tensorflow-implementation-of-forward-backward/67
# https://gist.github.com/currymj/e903644c4e54e35fdb858c94f1631fe4

In [None]:
# experimenting with: https://gist.github.com/fredcallaway/c7252b6326dfb502e70cad4146731aef
def categorical(ps):
    return Categorical(logits=ed.logit(ps)).value()

def flip(p):
    return tf.equal(Bernoulli(p=p), tf.constant(1))

def append(lst, x):
    return tf.concat(0, [lst, [x]])

class HMM(object):
    """A Hidden Markov Model."""

    def step(self, state):
        """Returns a new state following `state`."""
        raise NotImplementedError()

    def emit(self, state):
        """Returns an observable emission from `state`."""
        raise NotImplementedError()

    def init(self):
        """Returns an intial state."""
        raise NotImplementedError()

    def final(self, state):
        """Returns true if the model should stop in `state`."""
        raise NotImplementedError()
    
    def sample(self):
        def cond(states, emissions):
            s0 = states[-1]
            return self.final(s0) # TODO not final
        
        def body(states, emissions):
            s0 = states[-1]
            s1 = self.step(s0)
            e1 = self.emit(s1)
            return append(states, s1), append(emissions, e1)

        s0 = self.init()
        e0 = self.emit(s0)
        states = tf.convert_to_tensor([s0])
        emissions = tf.convert_to_tensor([e0])
        return tf.while_loop(
            cond, body, 
            loop_vars=[states, emissions],
            shape_invariants=[tf.TensorShape(None), tf.TensorShape(None)]
        )


class DiscreteGaussianHMM(HMM):
    """HMM with discrete transitions and gaussian emissions."""
    def __init__(self, P, mu, sigma, p_init, p_final):
        super().__init__()
        self.P = P
        self.mu = mu
        self.sigma = sigma
        self.p_init = p_init
        self.p_final = p_final

    def step(self, state):
        return categorical(self.P[state])

    def emit(self, state):
        return Normal(mu=self.mu[state], sigma=self.sigma[state]).value()

    def init(self):
        return categorical(self.p_init)

    def final(self, state):
        return flip(1 - tf.gather(self.p_final, state))
      

def demo():
    import matplotlib.pyplot as plt

    P = tf.constant(np.array([
        [.6, .4],
        [.2, .8],
    ], dtype='float32'))
    mu = tf.constant([5., -5.])
    sigma = tf.constant([1., 1.])
    model = DiscreteGaussianHMM(P, mu, sigma, [0.5, 0.5], [0.02, 0.02])

    sess = ed.get_session()
    emissions = [sess.run(model.sample()[1]) for _ in range(3)]
    for e in emissions:
        plt.plot(e)
    plt.show()