# WetGrass analyzed using Edward. Learning parameters (transition prob matrices of latent nodes) using KLqp and  batch training.

References
* https://discourse.edwardlib.org/t/simple-bayesian-network-sprinkler-example/258
* http://edwardlib.org/tutorials/batch-training
* https://discourse.edwardlib.org/t/parameter-learning-with-simple-bayesian-network-pymc3-vs-edward-edward-posteriors-not-converging-around-correct-parameter-values/653

In [1]:
import numpy as np
import tensorflow as tf

import edward as ed
import edward.models as edm
import edward.inferences as edi

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

In [2]:
import os
import sys
cur_dir_path = os.getcwd()
print(cur_dir_path)
os.chdir('../../')
qfog_path = os.getcwd()
print(qfog_path)
sys.path.insert(0,qfog_path)

/home/bob/Notebooks/Quantum/quantum-fog/jupyter-notebooks/inference_via_ext_software
/home/bob/Notebooks/Quantum/quantum-fog


In [3]:
import importlib
mm = importlib.import_module("jupyter-notebooks.inference_via_ext_software.ModelMaker")
mme = importlib.import_module("jupyter-notebooks.inference_via_ext_software.ModelMaker_Edward")
from graphs.BayesNet import *

In [4]:
# build BayesNet object bnet from bif file
in_path = "examples_cbnets/WetGrass.bif"
bnet = BayesNet.read_bif(in_path, False)

In [5]:
# build model (with no observed nodes) from bnet
prefix0 = "jupyter-notebooks/" +\
        "inference_via_ext_software/model_examples_c/"
file_prefix = prefix0 + "WetGrass_inf_obs_none"
mod_file = mme.ModelMaker_Edward.write_model_for_inf(file_prefix, bnet)

.py file with model can be found here

<a href="model_examples_c/WetGrass_inf_obs_none_edward.py">
model_examples_c/WetGrass_inf_obs_none_edward.py</a>

In [6]:
# -i option allows it to access notebook's namespace
%run -i $mod_file

In [7]:
# Generate data using Edward model. Quantum Fog has its 
# own function to generate random probs from a graph
# directly from bnet. Here we let Edward do it instead.

data_size = 2000

data = np.zeros([data_size, 4], dtype=int)
with tf.Session() as sess:
    for i in range(data_size):
        # data[:, i] doesn't work 
        data[i, :] = sess.run([Cloudy, Rain, Sprinkler, WetGrass])
print('data[0:5, :]\n', data[0:5, :])
print("data[0:5, 0]\n", data[0:5, 0])

data[0:5, :]
 [[0 0 1 1]
 [1 0 1 1]
 [1 0 0 0]
 [0 1 1 1]
 [0 1 1 1]]
data[0:5, 0]
 [0 1 1 0 0]


In [8]:
# del previous graph before we create a new one for parameter learning
tf.reset_default_graph()

In [9]:
# build model (with observed Cloudy and WetGrass) from bnet
prefix0 = "jupyter-notebooks/" +\
        "inference_via_ext_software/model_examples_c/"
file_prefix = prefix0 + "WetGrass_par_VA_obs_CW"
obs_vertices = ['Cloudy', "WetGrass"]
mod_file = mme.ModelMaker_Edward.write_model_for_param_learning(file_prefix, bnet, obs_vertices)

.py file with model can be found here

<a href="model_examples_c/WetGrass_par_VA_obs_CW_edward.py">
model_examples_c/WetGrass_par_VA_obs_CW_edward.py</a>

In [10]:
batch_size = 40

In [11]:
# -i option allows it to access notebook's namespace

# The variable 'sam_size' (sample size of observed nodes) is used
# without value in the .py file and must be set to an int before
# running the .py file.

sam_size = batch_size
%run -i $mod_file

In [12]:
num_epochs = 2  # number of full passes, cycles, over data
num_batches = data_size//batch_size  # number of batches in one epoch
assert batch_size*num_batches == data_size 

In [13]:
q_dict = {probs_Rain: probs_Rain_q, 
          probs_Sprinkler: probs_Sprinkler_q}
data_dict = {WetGrass: WetGrass_ph}
inf = edi.KLqp(q_dict, data=data_dict)
n_iter = num_batches*num_epochs # number of batches in all epochs
scale = {Cloudy:num_batches, WetGrass:num_batches}
inf.initialize(n_iter=n_iter, n_samples=5, scale=scale)

tf.global_variables_initializer().run()

bgen = mm.ModelMaker.batch_gen([data[:, 0], data[:, 3]], batch_size)
for i in range(inf.n_iter):
    Cloudy_bat, WetGrass_bat = next(bgen)
    info_dict = inf.update({Cloudy: Cloudy_bat, WetGrass_ph: WetGrass_bat})
    inf.print_progress(info_dict)
# inf.finalize()

100/100 [100%] ██████████████████████████████ Elapsed: 5s | Loss: 2572.847


In [14]:
# print prob distributions, estimate and true
# Concentration = 1 means uniform distribution, so 
# the bigger than 1, the better for concentrations, 
# less than 1 not good.
sess = ed.get_session()
rc, rm, sc, sm = sess.run([probs_Rain_q.concentration,
                           probs_Rain_q.mean(),
                           probs_Sprinkler_q.concentration,
                           probs_Sprinkler_q.mean()])

print('\nRain')
print('concentration:')
print(rc)
print('estimate:')
print(rm)
print('true:')
print(bnet.get_node_named('Rain').potential.pot_arr)

print('\nSprinkler')
print('concentration:')
print(sc)
print('estimate:')
print(sm)
print('true:')
print(bnet.get_node_named('Sprinkler').potential.pot_arr)


Rain
concentration:
[[ 0.20081396  0.85744417]
 [ 1.24493313  0.40282616]]
estimate:
[[ 0.18975896  0.81024098]
 [ 0.75553095  0.24446905]]
true:
[[ 0.4  0.6]
 [ 0.5  0.5]]

Sprinkler
concentration:
[[ 0.07028798  0.62330669]
 [ 0.72268617  0.15767747]]
estimate:
[[ 0.10133869  0.89866126]
 [ 0.82089508  0.17910494]]
true:
[[ 0.2  0.8]
 [ 0.7  0.3]]
