In [1]:
"""
Creating alltogether a new graph is not a good idea
It will require a lot of deep cloning!

We need to update children, and after collapse they should happen 
"""
%run BNRep.ipynb

>> A
>> B
>> C

A 	| B,	 C 	| Prob
1 	| True,	 Haku 	| 0.1
1 	| True,	 Mata 	| 0.2
1 	| Fals,	 Haku 	| 0.3
1 	| Fals,	 Mata 	| 0.4
2 	| True,	 Haku 	| 0.5
2 	| True,	 Mata 	| 0.6
2 	| Fals,	 Haku 	| 0.7
2 	| Fals,	 Mata 	| 0.8
3 	| True,	 Haku 	| 0.9
3 	| True,	 Mata 	| 1.0
3 	| Fals,	 Haku 	| 1.1
3 	| Fals,	 Mata 	| 1.2


In [2]:
G = []
A = Vertex("A: Metastatic Cancer")
B = Vertex("B: Increased total serum calcium")
C = Vertex("C: Brain tumor")
D = Vertex("D: Coma")
E = Vertex("E: Severe headaches")

A.values = [True, False]
B.values = [True, False]
C.values = [True, False]
D.values = [True, False]
E.values = [True, False]

a_cpt = CPT([A], [0.2, 0.8])
b_cpt = CPT([B, A], [0.8, 0.2, 0.2, 0.8])
c_cpt = CPT([C, A], [0.2, 0.05, 0.8, 0.95])
d_cpt = CPT([D, B, C], [0.8, 0.8, 0.8, 0.05, 
                        0.2, 0.2, 0.2, 0.95])
e_cpt = CPT([E, C], [0.8, 0.6, 0.2, 0.4])
A.CPT = a_cpt
B.CPT = b_cpt
C.CPT = c_cpt
D.CPT = d_cpt
E.CPT = e_cpt

G = [A, B, C, D, E]

In [27]:
def setChildren(G):
    """
    Given a graph, as list of nodes, it'll update 
    children for all nodes
    """
    for n in G:
        n.children = []
    for n in G:
        for p in n.parents:
            p.children += [n]

In [82]:
def collapse(G, nodes, newName):
    """
    It will return a new graph (aka list of nodes)
    where new graph will replace all of the nodes by newNode
    """
    setChildren(G)
    nG = [N for N in G if N not in nodes]
    pSet = set()
    cSet = set()
    for n in nodes:
        pSet = pSet.union(set(n.parents))
        cSet = cSet.union(set(n.children))
    nNode = Vertex(newName)
    nNode.parents = list(pSet)
    nNode.children = list(cSet)
    nNode.origG = G
    nNode.origNodes = nodes
    nG += [nNode]
    pCard = np.prod(#[1] +  #So that we have 2 values
            [n.nCard() for n in nNode.parents])
    #card = prod of all avlues taken by nodes
    card = np.prod([n.nCard() for n in nodes])
    nNode.values = [i for i in range(card)]
    #
    #Now, calculate newNode|it's parents
    probs = []
    nodesVals = [0]*len(nodes)
    nNodeMap = {}
    for nV in nNode.values:  #Till card
        if nV != 0: #then increment values in nodesVals
            for idx in range(len(nodes)-1, -1, -1):
                nodesVals[idx] += 1
                if nodesVals[idx] < nodes[idx].nCard():
                        break
                nodesVals[idx] = 0
        nNodeMap[nV] = [v for v in nodesVals] #Trying to copy 
        # the list At this point we have values for all the nodes
        #Now for each value we'll fill values for all the parents!
        pVals = [0]*len(nNode.parents)
        for pIdx in range(pCard):
            if pIdx != 0:
                for idx in range(len(pVals)-1, -1, -1):
                    pVals[idx] += 1
                    if pVals[idx] < nNode.parents[idx].nCard():
                        break
                    pVals[idx] = 0
            t_prob = 1
            for nIdx in range(len(nodes)):
                #Find the probability of this node, given
                # the value it takes along with it's parent
                nArgs = [nodesVals[nIdx]]
                node = nodes[nIdx]
                for p in node.parents:
                    nArgs += [pVals[nNode.parents.index(p)]]
                t_prob *= node.CPT.getP4mIdx(nArgs)
            probs += [t_prob]
    nNode.CPT = CPT([nNode]+nNode.parents, probs)
    #
    #Now, CPT of children would also need to be updated!!
    #p(c|z) = p(c|a,b) = p(c|a): meaning entries may repeat 
    # if not all nodes are parents.
    #
    #Simplest way would be to query original node for the value!!
    #z value would map to values of existing node!!
    print("Now handling child nodes:")
    for cNode in nNode.children:
        nG.remove(cNode)
        pPars = cNode.parents
        #
        #Now create a new child node
        nPars = [N for N in pPars if N not in nodes] + [nNode]
        nCNode = Vertex(cNode.name)
        nCNode.values = [v for v in cNode.values]
        nCNode.parents = nPars
        nG += [nCNode]
        #
        #Now, let's build it's CPT
        #First the nCNode and then it's parents
        probs = []
        #Now, enumerate the values for nodes in nPars
        # Then using nNodeMap, get the correspondig values for 
        #   pPars, then get it's prob
        # We ensure that prob are populated in correct order!
        card = np.prod([n.nCard() for n in nPars])
        nVals = [0]*(len(nPars))
        for nodeValIdx in range(nCNode.nCard()):
            for idx in range(card):
                if idx!= 0 or nodeValIdx != 0:
                    #TODO: Perhaps remove thsi if by setting 
                    #      nVals to all 1
                    for pIdx in range(len(nPars) -1, -1, -1):
                        nVals[pIdx] += 1
                        if nVals[pIdx] < nPars[pIdx].nCard():
                                break
                        nVals[pIdx] = 0
                #
                #Now nVals[0], refers to the new nCnode
                #From this we'll get values for the old parents
                # and use cNode.CPT.getP4mIdx(args) :-)
                # and append that to prob sequentially
                nodeVals = nNodeMap[nVals[-1]]
                """args = [0]*len(cNode.parents)
                for i in range(len(cNode.parents)):
                    n = cNode.parents[i]
                    if n in nCNode.parents:
                        args[i] = nVals[nPars.index(n)]
                    else:
                        #i is refering to nodes
                        # and it has been subsumed by nNode
                        args[i] = nodeVals[nodes.index(n)]
                #More legently as:"""
                args = []
                for n in cNode.parents:
                    if n in nCNode.parents:
                        args += [nVals[nPars.index(n)]]
                    else:
                        #i is refering to nodes
                        # and it has been subsumed by nNode
                        args += [nodeVals[nodes.index(n)]]
                probs += [cNode.CPT.getP4mIdx([nodeValIdx]+args)]
        nCNode.CPT = CPT([nCNode] + nPars, probs)
    setChildren(nG)
    return nG

In [83]:
Z = collapse(G, [B, C], "Z")
Z[-1].CPT.printCPT()

Now handling child nodes:
>> D: Coma
>> Z

D: C 	| Z 	| Prob
True 	| 0 	| 0.8
True 	| 1 	| 0.8
True 	| 2 	| 0.8
True 	| 3 	| 0.05
False 	| 0 	| 0.2
False 	| 1 	| 0.2
False 	| 2 	| 0.2
False 	| 3 	| 0.95


In [84]:
[z.name for z in Z]

['A: Metastatic Cancer', 'Z', 'E: Severe headaches', 'D: Coma']

In [85]:
Z[-1].CPT.printCPT()

>> D: Coma
>> Z

D: C 	| Z 	| Prob
True 	| 0 	| 0.8
True 	| 1 	| 0.8
True 	| 2 	| 0.8
True 	| 3 	| 0.05
False 	| 0 	| 0.2
False 	| 1 	| 0.2
False 	| 2 	| 0.2
False 	| 3 	| 0.95


In [86]:
D.CPT.printCPT()

>> D: Coma
>> B: Increased total serum calcium
>> C: Brain tumor

D: C 	| B: I,	 C: B 	| Prob
True 	| True,	 True 	| 0.8
True 	| True,	 Fals 	| 0.8
True 	| Fals,	 True 	| 0.8
True 	| Fals,	 Fals 	| 0.05
False 	| True,	 True 	| 0.2
False 	| True,	 Fals 	| 0.2
False 	| Fals,	 True 	| 0.2
False 	| Fals,	 Fals 	| 0.95


In [87]:
Z[-3].CPT.printCPT()

>> Z
>> A: Metastatic Cancer

Z 	| A: M 	| Prob
0 	| True 	| 0.16000000000000003
0 	| Fals 	| 0.010000000000000002
1 	| True 	| 0.6400000000000001
1 	| Fals 	| 0.19
2 	| True 	| 0.04000000000000001
2 	| Fals 	| 0.04000000000000001
3 	| True 	| 0.16000000000000003
3 	| Fals 	| 0.76


In [80]:
D.CPT.printCPT()

>> D: Coma
>> B: Increased total serum calcium
>> C: Brain tumor

D: C 	| B: I,	 C: B 	| Prob
True 	| True,	 True 	| 0.8
True 	| True,	 Fals 	| 0.8
True 	| Fals,	 True 	| 0.8
True 	| Fals,	 Fals 	| 0.05
False 	| True,	 True 	| 0.2
False 	| True,	 Fals 	| 0.2
False 	| Fals,	 True 	| 0.2
False 	| Fals,	 Fals 	| 0.95


In [81]:
'A' if 4 > 3 else 'B'

'A'