In [16]:
from probability import *
from utils import print_table
# from notebook import psource, pseudocode, heatmap


class BayesNode:
    """A conditional probability distribution for a boolean variable,
    P(X | parents). Part of a BayesNet."""

    def __init__(self, X, parents, cpt):
        """X is a variable name, and parents a sequence of variable
        names or a space-separated string.  cpt, the conditional
        probability table, takes one of these forms:

        * A number, the unconditional probability P(X=true). You can
          use this form when there are no parents.

        * A dict {v: p, ...}, the conditional probability distribution
          P(X=true | parent=v) = p. When there's just one parent.

        * A dict {(v1, v2, ...): p, ...}, the distribution P(X=true |
          parent1=v1, parent2=v2, ...) = p. Each key must have as many
          values as there are parents. You can use this form always;
          the first two are just conveniences.

        In all cases the probability of X being false is left implicit,
        since it follows from P(X=true).

        >>> X = BayesNode('X', '', 0.2)
        >>> Y = BayesNode('Y', 'P', {T: 0.2, F: 0.7})
        >>> Z = BayesNode('Z', 'P Q',
        ...    {(T, T): 0.2, (T, F): 0.3, (F, T): 0.5, (F, F): 0.7})
        """
        if isinstance(parents, str):
            parents = parents.split()

        # We store the table always in the third form above.
        if isinstance(cpt, (float, int)):  # no parents, 0-tuple
            cpt = {(): cpt}
        elif isinstance(cpt, dict):
            # one parent, 1-tuple
            if cpt and isinstance(list(cpt.keys())[0], bool):
                cpt = {(v,): p for v, p in cpt.items()}

        assert isinstance(cpt, dict)
        for vs, p in cpt.items():
            assert isinstance(vs, tuple) and len(vs) == len(parents)
            assert all(isinstance(v, bool) for v in vs)
            assert 0 <= p <= 1

        self.variable = X
        self.parents = parents
        self.cpt = cpt
        self.children = []

    def p(self, value, event):
        """Return the conditional probability
        P(X=value | parents=parent_values), where parent_values
        are the values of parents in event. (event must assign each
        parent a value.)
        >>> bn = BayesNode('X', 'Burglary', {T: 0.2, F: 0.625})
        >>> bn.p(False, {'Burglary': False, 'Earthquake': True})
        0.375"""
        assert isinstance(value, bool)
        ptrue = self.cpt[event_values(event, self.parents)]
        return ptrue if value else 1 - ptrue

    def sample(self, event):
        """Sample from the distribution for this variable conditioned
        on event's values for parent_variables. That is, return True/False
        at random according with the conditional probability given the
        parents."""
        return probability(self.p(True, event))

    def __repr__(self):
        return repr((self.variable, ' '.join(self.parents)))   

           
        
        
        
class BayesNet:
    """Bayesian network containing only boolean-variable nodes."""

    def __init__(self, node_specs=None):
        """Nodes must be ordered with parents before children."""
        self.nodes = []
        self.variables = []
        node_specs = node_specs or []
        for node_spec in node_specs:
            self.add(node_spec)

    def add(self, node_spec):
        """Add a node to the net. Its parents must already be in the
        net, and its variable must not."""
        node = BayesNode(*node_spec)
        assert node.variable not in self.variables
        assert all((parent in self.variables) for parent in node.parents)
        self.nodes.append(node)
        self.variables.append(node.variable)
        for parent in node.parents:
            self.variable_node(parent).children.append(node)

    def variable_node(self, var):
        """Return the node for the variable named var.
        >>> burglary.variable_node('Burglary').variable
        'Burglary'"""
        for n in self.nodes:
            if n.variable == var:
                return n
        raise Exception("No such variable: {}".format(var))

    def variable_values(self, var):
        """Return the domain of var."""
        return [True, False]

    def __repr__(self):
        return 'BayesNet({0!r})'.format(self.nodes)             


    
    
    
def prior_sample(bn):
    """Randomly sample from bn's full joint distribution. The result
    is a {variable: value} dict. [Figure 14.13]"""
    event = {}
    for node in bn.nodes:
        event[node.variable] = node.sample(event)
    return event



def rejection_sampling(X, e, bn, N=10000):
    """Estimate the probability distribution of variable X given
    evidence e in BayesNet bn, using N samples.  [Figure 14.14]
    Raises a ZeroDivisionError if all the N samples are rejected,
    i.e., inconsistent with e.
    >>> random.seed(47)
    >>> rejection_sampling('Burglary', dict(JohnCalls=T, MaryCalls=T),
    ...   burglary, 10000).show_approx()
    'False: 0.7, True: 0.3'
    """
    counts = {x: 0 for x in bn.variable_values(X)}  # bold N in [Figure 14.14]
    for j in range(N):
        sample = prior_sample(bn)  # boldface x in [Figure 14.14]
        if consistent_with(sample, e):
            counts[sample[X]] += 1
    return ProbDist(X, counts)


def weighted_sample(bn, e):
    """Sample an event from bn that's consistent with the evidence e;
    return the event and its weight, the likelihood that the event
    accords to the evidence."""
    w = 1
    event = dict(e)  # boldface x in [Figure 14.15]
    for node in bn.nodes:
        Xi = node.variable
        if Xi in e:
            w *= node.p(e[Xi], event)
        else:
            event[Xi] = node.sample(event)
    return event, w


def enumerate_all(variables, e, bn):
    """Return the sum of those entries in P(variables | e{others})
    consistent with e, where P is the joint distribution represented
    by bn, and e{others} means e restricted to bn's other variables
    (the ones other than variables). Parents must precede children in variables."""
    if not variables:
        return 1.0
    Y, rest = variables[0], variables[1:]
    Ynode = bn.variable_node(Y)
    if Y in e:
        return Ynode.p(e[Y], e) * enumerate_all(rest, e, bn)
    else:
        return sum(Ynode.p(y, e) * enumerate_all(rest, extend(e, Y, y), bn)
                   for y in bn.variable_values(Y))
    
def enumeration_ask(X, e, bn):
    """Return the conditional probability distribution of variable X
    given evidence e, from BayesNet bn. [Figure 14.9]
    >>> enumeration_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary
    ...  ).show_approx()
    'False: 0.716, True: 0.284'"""
    assert X not in e, "Query variable must be distinct from evidence"
    Q = ProbDist(X)
    for xi in bn.variable_values(X):
        Q[xi] = enumerate_all(bn.variables, extend(e, X, xi), bn)
    return Q.normalize()    

#making nodes
# a_node = BayesNode('A', '', 0.1)
# s_node = BayesNode('S', '', 0.5)
# t_node = BayesNode('T', 'A', {True: 0.05, False: 0.01})
# l_node = BayesNode('L', 'S', {True: 0.1, False: 0.01})
# b_node = BayesNode('B', 'S', {True: 0.6, False: 0.3})
# e_node = BayesNode('E', ['T', 'L'], {(True, True): 1, (True, False): 1, (False, True): 1, (False, False): 0})
# x_node = BayesNode('X', 'E', {True: 0.98, False: 0.05})
# d_node = BayesNode('D', ['E', 'B'], {(True, True): 0.9, (True, False): 0.7, (False, True): 0.8, (False, False): 0.1})



b_network = BayesNet([
    ('A', '', 0.1),
    ('S', '', 0.5),
    ('T', 'A', {True: 0.05, False: 0.01}),
    ('L', 'S', {True: 0.1, False: 0.01}),
    ('B', 'S', {True: 0.6, False: 0.3}),
    ('E', ['T', 'L'], {(True, True): 1, (True, False): 1, (False, True): 1, (False, False): 0}),
    ('X', 'E', {True: 0.98, False: 0.05}),
    ('D', ['E', 'B'], {(True, True): 0.9, (True, False): 0.7, (False, True): 0.8, (False, False): 0.1})
])

#Prior sampling to predict lung cancer
# 2.  Predict the probability that a patient has lung 
# cancer using prior sampling
N = 1000

all_observations = [prior_sample(b_network) for x in range(N)]

cancer_true = [observation for observation in all_observations if observation['L'] == True]
prob_cancer_true = len(cancer_true) / N
print("Probability patient has lung cancer:", prob_cancer_true)

print("\n--- Probability patient has lung cancer given bronchitis and positive x-ray ----\n")

rej_sample = rejection_sampling('L', dict(B=True), b_network, 1000)
print("Rejection Sampling:", rej_sample[True])

#        (b) Likelihood weighting
likelihood_sample = likelihood_weighting('L', dict(B=True), b_network, 200)
print("Likelihood Weighting:", likelihood_sample[True])

#        (c) Enumeration
enumeration_sample = enumeration_ask('L', {'B': True, 'X': True}, b_network)
print("Enumeration:", enumeration_sample[True])


# 4. Ask the agent a question, then predict and 
# compare the results from prior sampling, rejection 
# sampling, Likelihood weighting and Enumeration.

print("\n--- Probability Patient has Tuberculosis ---- \n")
#probability patient has tuberculosis
tb_true = [observation for observation in all_observations if observation['T'] == True]
prob_tb_true = len(tb_true) / N
print("Probability patient has T:", prob_tb_true)

print("\n--- Probability Patient has Tuberculosis after not going to Asia and has a positive x ray---- \n")
T_rej_sample = rejection_sampling('T', dict(A=False), b_network, 1000)
print("Rejection Sampling:", T_rej_sample[True])

#        (b) Likelihood weighting
T_likelihood_sample = likelihood_weighting('T', dict(A=False), b_network, 200)
print("Likelihood Weighting:", T_likelihood_sample[True])

#        (c) Enumeration
T_enumeration_sample = enumeration_ask('T', {'A': False, 'X': True}, b_network)
print("Enumeration:", T_enumeration_sample[True])

Probability patient has lung cancer: 0.054

--- Probability patient has lung cancer given bronchitis and positive x-ray ----

Rejection Sampling: 0.07317073170731707
Likelihood Weighting: 0.09584664536741232
Enumeration: 0.539271715906

--- Probability Patient has Tuberculosis ---- 

Probability patient has T: 0.017

--- Probability Patient has Tuberculosis after not going to Asia and has a positive x ray---- 

Rejection Sampling: 0.012048192771084338
Likelihood Weighting: 0.009999999999999964
Enumeration: 0.08914074687211486
