In [1]:
import pandas as pd
import numpy as np

# Hela ML libraries 
from hela import hmm
import hela.generation.hmm as hmm_gen

# Viz libraries
import altair as alt
import hela.visualization.hmm as hmmplot 
import matplotlib.pyplot as plt
from hela.visualization.hmm import TU_COLORS
%matplotlib inline

# Utility Libraries
from datetime import datetime
from dask.distributed import Client
from scipy.special import logsumexp
from scipy import stats
import itertools
from IPython.display import Image

# PGMPy
from hela.hmm.graphical_models import DynamicBayesianNetwork as dbn
# from hela.hmm.graphical_models import structured_inference as dbn_inf
from pgmpy.inference import dbn_inference as dbn_inf

  return torch._C._cuda_getDeviceCount() > 0


### HMM Generative Model

In [2]:
n = 500
generative_model = hmm_gen.DiscreteHMMGenerativeModel(
                                     n_hidden_states = 3,
                                     n_gaussian_features=0,
                                    n_categorical_features = 2,
                                     n_gmm_components = None)

hidden_states = generative_model.generate_hidden_state_sequence(n_observations = n)

hmm_data = generative_model.generate_observations(hidden_states)
hmm_training_spec = generative_model.generative_model_to_discrete_hmm_training_spec()
model_config = hmm.DiscreteHMMConfiguration.from_spec(hmm_training_spec)
hmm_model = model_config.to_model()

### PGMpy Structure of HMM

![HMM_graph](hmm_graph.png)

In [3]:
# graph = dbn.hmm_model_to_graph(hmm_model)
# graph.initialize_initial_state()

In [4]:
hmm_graph = dbn.hmm_model_to_graph(hmm_model)

In [5]:
categorical_dict = {
    str(list(hmm_model.categorical_model.finite_values.values[i])): i
    for i in range(len(hmm_model.categorical_model.finite_values))
}
categorical_dict

{'[0, 0]': 0, '[0, 1]': 1, '[0, 2]': 2, '[1, 0]': 3, '[1, 1]': 4, '[1, 2]': 5}

In [6]:
hmm_flattened_data = pd.Series(
            [categorical_dict[str(list(v))] for v in np.array(hmm_data)],
            index=hmm_data.index)

ev_keys = [('cat_obs', i) for i in range(n)]
ev_dict = dict(zip(ev_keys, hmm_flattened_data.values[:n]))
variables = [('hs', i) for i in range(2,n)]

### HMM Graphical Inference

Inference in the graphical HMM model is done by belief propagation with variable elimination. In the case of graphs with HMM structure, this becomes equivalent to the forward-backward algorithm.

In [7]:
inference = dbn_inf.DBNInference(hmm_graph)
forward = inference.forward_inference(variables, ev_dict)
backward = inference.backward_inference(variables, ev_dict)

AttributeError: 'DBNInference' object has no attribute 'start_bayesian_model'

In [None]:
posterior = [(forward[key]*backward[key]).values for key in forward]

posterior = np.divide(np.array(posterior), np.sum(np.array(posterior), axis=1).reshape(-1, 1))
pred_hidden_states = pd.Series(
            np.argmax(posterior, axis = 1),
            index=hidden_states.index[2:n])

hmmplot.draw_states(pred_hidden_states, hide_brush=False)

In [None]:
hmmplot.draw_states(hidden_states[2:n], hide_brush=False)

### FHMM Model

In [None]:
gen = hmm_gen.FactoredHMMGenerativeModel(
                                     ns_hidden_states =[2, 2, 2],
                                     n_gaussian_features = 0,
                                     n_categorical_features = 2)

factored_hidden_states = gen.generate_hidden_state_sequence(n_observations = 500)

In [None]:
flattened_hidden_states = gen.flatten_hidden_state_sequence(factored_hidden_states)
hmmplot.draw_states(flattened_hidden_states, hide_brush=False)

In [None]:
fhmm_data = gen.generate_observations(factored_hidden_states)
fhmm_training_spec = hmm_gen.data_to_fhmm_training_spec(factored_hidden_states, 
                                   gen.ns_hidden_states, 
                                   fhmm_data,
                                   categorical_features = list(gen.categorical_values.columns), 
                                   gaussian_features = [])

fhmm_config = hmm.FactoredHMMConfiguration.from_spec(fhmm_training_spec)

fhmm_model = fhmm_config.to_model()

In [None]:
fhmm_model.graph

### PGMpy Structure of FHMM

For the edge factors in an FHMM graph, each transition arrow is defined by the corresponding transition matrix for that system.  For categorical data, we define the marginalized emission probability for each emission edge.  For continous data, the edge is defined by the W matrix and covariance matrix.  In the learning/inference process these will be used to generate the pdf for a given hs vector.

In [None]:
for node in fhmm_model.graph.nodes:
    print("latent status for node {} at time step {}: ".format(node[0], node[1]), 
          fhmm_model.graph.nodes[node])

In [None]:
fhmm_model.graph.edges()

In [None]:
fhmm_model.graph.check_model()

In [None]:
fhmm_model.graph.get_factors(time_slice=0)


In [None]:
fhmm_model.graph.get_factors(time_slice=1)

In [None]:
fhmm_model.graph.edges()

### Structured Variational Inference

We break up each of the Markov systems in the fHMM graph and run the forward backward algorithm in each of the systems


TODO: add in variational parameter + checking for convergence

In [None]:
system_graphs = fhmm_model.graph.generate_system_graphs()

In [None]:
for graph in system_graphs:
    assert graph.check_model()

In [None]:
system_graphs[0].factors

In [None]:
system_graphs[1].factors

In [None]:
system_graphs[2].factors

In [None]:
for i, graph in enumerate(system_graphs):
    print("Nodes present in chain {}: {} ".format(i, graph.nodes))

In [None]:
n = 500
categorical_dict = {
    str(list(fhmm_model.categorical_model.categorical_values.values[i])): i
    for i in range(len(fhmm_model.categorical_model.categorical_values))
}

fhmm_flattened_data = pd.Series(
            [categorical_dict[str(list(v))] for v in np.array(fhmm_data)],
            index=fhmm_data.index)

system = system_graphs[0].get_latent_nodes()[0][0]
observation_node = system_graphs[0].get_observable_nodes()[0][0]

ev_keys = [(observation_node, i) for i in range(n)]
ev_dict = dict(zip(ev_keys, fhmm_flattened_data.values[:n]))
variables = [(system, i) for i in range(2,n)]

In [None]:
inference = dbn_inf.DBNInference(system_graphs[0])
forward = inference.forward_inference(variables, ev_dict)
backward = inference.backward_inference(variables, ev_dict)

In [None]:
posterior = [(forward[key]*backward[key]).values for key in forward]

posterior = np.divide(np.array(posterior), np.sum(np.array(posterior), axis=1).reshape(-1, 1))


In [None]:
[forward[key].values for key in forward]

In [None]:
[backward[key].values for key in backward]

In [None]:
posterior