In [2]:
import numpy as np
import pandas as pd
import csv

from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.estimators import HillClimbSearch, K2Score, MaximumLikelihoodEstimator, ParameterEstimator, BayesianEstimator
from pgmpy.sampling import BayesianModelSampling

# Data Preprocessing

In [3]:
def maybe_float(s):
    try:
        return float(s)
    except (ValueError, TypeError):
        return s

def percentage(s):
    try:
        return s / 100.
    except (ValueError, TypeError):
        return s

def remove_none(s):
    if s is '':
        return 0
    return str(s)

## Marginal Probabilities

In [4]:
marginalProb_file = ('data/Table2.csv')
rawData = []

with open(marginalProb_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)
        
marginalProb = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        marginalProb.append(a[0])
    
marginalProb = np.reshape(marginalProb, (np.shape(rawData)[0], -1))
marginalProb_clean = marginalProb[1:, 1:]
margProb = marginalProb_clean.astype(np.float)
print(margProb)

[[0.78  0.275 0.18  0.715 0.375 0.015]
 [0.015 0.32  0.66  0.105 0.11  0.32 ]
 [0.055 0.025 0.16  0.01  0.105 0.14 ]
 [0.15  0.17  0.    0.17  0.41  0.315]
 [0.    0.21  0.    0.    0.    0.21 ]]


## CPDs

In [5]:
table3_file = ('data/Table3.csv')
rawData = []

with open(table3_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)
        
table3 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table3.append(a[0])
    
table3 = np.reshape(table3, (np.shape(rawData)[0], -1))
table3_clean = table3[1:, 1:]
cpd_x1 = table3_clean.astype(np.float)
print(cpd_x1)

[[0.78  0.015 0.055 0.15 ]
 [0.231 0.666 0.455 0.4  ]
 [0.365 0.    0.091 0.2  ]
 [0.026 0.    0.    0.033]
 [0.173 0.    0.182 0.167]
 [0.205 0.333 0.273 0.2  ]
 [0.737 1.    0.727 0.567]
 [0.077 0.    0.273 0.2  ]
 [0.013 0.    0.    0.   ]
 [0.173 0.    0.    0.233]
 [0.019 0.    0.    0.   ]
 [0.282 0.666 0.545 0.4  ]
 [0.128 0.333 0.091 0.2  ]
 [0.352 0.    0.182 0.2  ]
 [0.218 0.    0.182 0.2  ]]


In [6]:
table4_file = ('data/Table4.csv')
rawData = []

with open(table4_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)

table4 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table4.append(a[0])
    
table4 = np.reshape(table4, (np.shape(rawData)[0], -1))
table4_clean = table4[1:, 1:]
cpd_x2 = table4_clean.astype(np.float)
cpd_x2[4,1] = 0.344
print(cpd_x2)

[[0.275 0.32  0.025 0.17  0.21 ]
 [0.127 0.266 0.2   0.176 0.119]
 [0.745 0.656 0.8   0.706 0.5  ]
 [0.127 0.078 0.    0.118 0.381]
 [0.418 0.344 0.6   0.382 0.334]
 [0.073 0.109 0.4   0.147 0.095]
 [0.109 0.125 0.    0.118 0.071]
 [0.4   0.422 0.    0.353 0.5  ]]


In [7]:
table5_file = ('data/Table5.csv')
rawData = []

with open(table5_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)

table5 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table5.append(a[0])
    
table5 = np.reshape(table5, (np.shape(rawData)[0], -1))
table5_clean = table5[1:, 1:]
cpd_x3 = table5_clean.astype(np.float)
print(cpd_x3)

[[0.18   0.66   0.16  ]
 [0.194  0.311  0.219 ]
 [0.472  0.318  0.156 ]
 [0.028  0.0303 0.    ]
 [0.167  0.182  0.125 ]
 [0.139  0.159  0.5   ]
 [0.361  0.394  0.313 ]
 [0.083  0.114  0.125 ]
 [0.222  0.091  0.031 ]
 [0.333  0.402  0.531 ]
 [0.     0.023  0.    ]
 [0.389  0.318  0.25  ]
 [0.083  0.152  0.156 ]
 [0.361  0.303  0.203 ]
 [0.167  0.204  0.281 ]]


In [8]:
table6_file = ('data/Table6.csv')
rawData = []

with open(table6_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)

table6 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table6.append(a[0])
    
table6 = np.reshape(table6, (np.shape(rawData)[0], -1))
table6_clean = table6[1:, 1:]
cpd_x4 = table6_clean.astype(np.float)
print(cpd_x4)

[[0.715 0.105 0.01  0.17 ]
 [0.804 0.571 1.    0.794]
 [0.021 0.    0.    0.   ]
 [0.056 0.143 0.    0.   ]
 [0.119 0.286 0.    0.206]
 [0.308 0.238 0.    0.176]
 [0.322 0.286 1.    0.323]
 [0.028 0.    0.    0.029]
 [0.154 0.19  0.    0.235]
 [0.196 0.286 0.    0.235]
 [0.021 0.    0.    0.   ]
 [0.28  0.571 0.    0.353]
 [0.154 0.143 0.    0.088]
 [0.329 0.19  0.5   0.323]
 [0.217 0.095 0.5   0.235]]


In [9]:
table7_file = ('data/Table7.csv')
rawData = []

with open(table7_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)

table7 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table7.append(a[0])
    
table7 = np.reshape(table7, (np.shape(rawData)[0], -1))
table7_clean = table7[1:, 1:]
cpd_x5 = table7_clean.astype(np.float)
cpd_x5[1,3] = 0.268
cpd_x5[2,3] = 0.329
cpd_x5[3,3] = 0.
cpd_x5[4,3] = 0.146
cpd_x5[5,3] = 0.256
cpd_x5[8,2] = 0.048
cpd_x5[5,0] = 0.187
print(cpd_x5)

[[0.375 0.11  0.105 0.41 ]
 [0.307 0.182 0.286 0.268]
 [0.293 0.318 0.381 0.329]
 [0.04  0.091 0.    0.   ]
 [0.173 0.227 0.19  0.146]
 [0.187 0.182 0.143 0.256]
 [0.173 0.136 0.381 0.146]
 [0.693 0.682 0.571 0.646]
 [0.133 0.182 0.048 0.207]]


In [10]:
table8_file = ('data/Table8.csv')
rawData = []

with open(table8_file, newline='') as data:
    reader = csv.reader(data)
    for row in reader:
        rawData.append(row)

table8 = []
for j in range(np.shape(rawData)[0]):
    for i in range(len(rawData[0])):
        a = rawData[j][i].split('%')
        a = [remove_none(v) for v in a]
        a = [maybe_float(v) for v in a]
        a = [percentage(v) for v in a]
        table8.append(a[0])
    
table8 = np.reshape(table8, (np.shape(rawData)[0], -1))
table8_clean = table8[1:, 1:]
table8_clean[6,4] = 0.309
cpd_x6 = table8_clean.astype(np.float)
cpd_x6[3, 1] = 0.094
cpd_x6[10, 0] = 0.
cpd_x6[11, 0] = 1.
cpd_x6[12, 0] = 0.
cpd_x6[4, 1] = 0.187
cpd_x6[10, 1] = 0.218
cpd_x6[11, 1] = 0.656
cpd_x6[12, 1] = 0.125
cpd_x6[10, 2] = 0.107
cpd_x6[11, 2] = 0.714
cpd_x6[12, 2] = 0.179
cpd_x6[3, 3] = 0.032
cpd_x6[4, 3] = 0.095
cpd_x6[10, 3] = 0.206
cpd_x6[11, 3] = 0.635
cpd_x6[12, 3] = 0.159

print(cpd_x6)

[[0.015 0.32  0.14  0.315 0.21 ]
 [1.    0.687 0.714 0.873 0.809]
 [0.    0.031 0.036 0.    0.   ]
 [0.    0.094 0.036 0.032 0.048]
 [0.    0.187 0.214 0.095 0.143]
 [0.    0.281 0.214 0.317 0.262]
 [0.333 0.296 0.392 0.317 0.309]
 [0.    0.    0.    0.079 0.   ]
 [0.666 0.234 0.142 0.095 0.167]
 [0.    0.187 0.25  0.19  0.262]
 [0.    0.218 0.107 0.206 0.143]
 [1.    0.656 0.714 0.635 0.643]
 [0.    0.125 0.179 0.159 0.214]
 [1.    0.625 0.786 0.746 0.738]
 [0.    0.187 0.107 0.063 0.048]
 [0.    0.    0.    0.016 0.024]
 [0.    0.187 0.107 0.175 0.19 ]]


# Task 1

In [11]:
def correlation(xy, y, x):
    return np.abs(xy*y - x*y)

def xy_corr(a, b, c):
    data = []
    for j in range(np.shape(a)[0]):
        #print(j)
        for i in range(np.shape(c)[0]):
#             print(i)
#             print('c[i, j]', c[i, j])
#             print('a[j]', a[j])
#             print('b[i]', b[i])
            corr = correlation(c[i, j], a[j], b[i])
            #print(corr)
            data.append(corr)
    return np.sum(data)

## Data range

In [12]:
x1 = margProb[0:-1, 0]
x2 = margProb[0:, 1]
x3 = margProb[0:-2, 2]
x4 = margProb[0:-1, 3]
x5 = margProb[0:-1, 4]
x6 = margProb[0:, 5]

x1x2 = cpd_x1[1:6, 0:]
x1x4 = cpd_x1[6:10, 0:]
x1x6 = cpd_x1[10:, 0:]

x2x3 = cpd_x2[1:4, 0:]
x2x5 = cpd_x2[4:, 0:]

x3x2 = cpd_x3[1:6, 0:]
x3x5 = cpd_x3[6:10, 0:]
x3x6 = cpd_x3[10:, 0:]

x4x1 = cpd_x4[1:5, 0:]
x4x2 = cpd_x4[5:10, 0:]
x4x6 = cpd_x4[10:, 0:]

x5x2 = cpd_x5[1:6, 0:]
x5x3 = cpd_x5[6:, 0:]

x6x1 = cpd_x6[1:5, 0:]
x6x2 = cpd_x6[5:10, 0:]
x6x3 = cpd_x6[10:13, 0:]
x6x4 = cpd_x6[13:, 0:]

print(x1)
print(x2)
print(x3)
print(x4)
print(x5)
print(x6)


[0.78  0.015 0.055 0.15 ]
[0.275 0.32  0.025 0.17  0.21 ]
[0.18 0.66 0.16]
[0.715 0.105 0.01  0.17 ]
[0.375 0.11  0.105 0.41 ]
[0.015 0.32  0.14  0.315 0.21 ]


In [13]:
corr = []

## x1x2 / x1x4 / x1x6

In [14]:
# x1x2
x1x2_corr = xy_corr(x1, x2, x1x2)
corr.append(['x1x2', x1x2_corr])

#x1x4
x1x4_corr = xy_corr(x1, x4, x1x4)
corr.append(['x1x4', x1x4_corr])

#x1x6
x1x6_corr = xy_corr(x1, x6, x1x6)
corr.append(['x1x6', x1x6_corr])

## x2x3 / x2x5

In [15]:
# x2x3
x2x3_corr = xy_corr(x2, x3, x2x3)
corr.append(['x2x3', x2x3_corr])

# x2x5
x2x5_corr = xy_corr(x2, x5, x2x5)
corr.append(['x2x5', x2x5_corr])

## x3x2 / x3x5 / x3x6

In [16]:
# x3x2
x3x2_corr = xy_corr(x3, x2, x3x2)
corr.append(['x3x2', x3x2_corr])

# x3x5
x3x5_corr = xy_corr(x3, x5, x3x5)
corr.append(['x3x5', x3x5_corr])

# x3x6
x3x6_corr = xy_corr(x3, x6, x3x6)
corr.append(['x3x6', x3x6_corr])

## x4x1 / x4x2 / x4x6

In [17]:
# x4x1
x4x1_corr = xy_corr(x4, x1, x4x1)
corr.append(['x4x1', x4x1_corr])

# x4x2
x4x2_corr = xy_corr(x4, x2, x4x2)
corr.append(['x4x2', x4x2_corr])

# x4x6
x4x6_corr = xy_corr(x4, x6, x4x6)
corr.append(['x4x6', x4x6_corr])

## x5x2 / x5x3

In [18]:
# x5x2
x5x2_corr = xy_corr(x5, x2, x5x2)
corr.append(['x5x2', x5x2_corr])

# x5x3
x5x3_corr = xy_corr(x5, x3, x5x3)
corr.append(['x5x3', x5x3_corr])

## x6x1 / x6x2/ x6x3 / x6x4

In [19]:
# x6x1
x6x1_corr = xy_corr(x6, x1, x6x1)
corr.append(['x6x1', x6x1_corr])

# x6x2
x6x2_corr = xy_corr(x6, x2, x6x2)
corr.append(['x6x2', x6x2_corr])

# x6x3
x6x3_corr = xy_corr(x6, x3, x6x3)
corr.append(['x6x3', x6x3_corr])

# x6x4
x6x4_corr = xy_corr(x6, x4, x6x4)
corr.append(['x6x4', x6x4_corr])

In [20]:
print(corr)

[['x1x2', 0.15977], ['x1x4', 0.11943000000000005], ['x1x6', 0.16015500000000005], ['x2x3', 0.21852500000000002], ['x2x5', 0.12926000000000004], ['x3x2', 0.21875800000000006], ['x3x5', 0.11551999999999997], ['x3x6', 0.11324000000000001], ['x4x1', 0.11957000000000005], ['x4x2', 0.11569999999999997], ['x4x6', 0.14347000000000001], ['x5x2', 0.12939], ['x5x3', 0.11596500000000005], ['x6x1', 0.16036999999999996], ['x6x2', 0.17531500000000003], ['x6x3', 0.09433999999999998], ['x6x4', 0.14307000000000003]]


# Task 2

## Setting a threshold

In [21]:
def threshold(data, coeff):
    index = np.argwhere((data[:, 1].astype(float) >= coeff))
    return data[index]

threshold(np.array((corr)), 0.12)

array([[['x1x2', '0.15977']],

       [['x1x6', '0.16015500000000005']],

       [['x2x3', '0.21852500000000002']],

       [['x2x5', '0.12926000000000004']],

       [['x3x2', '0.21875800000000006']],

       [['x4x6', '0.14347000000000001']],

       [['x5x2', '0.12939']],

       [['x6x1', '0.16036999999999996']],

       [['x6x2', '0.17531500000000003']],

       [['x6x4', '0.14307000000000003']]], dtype='<U19')

In [23]:
# x2x1
x1x2_x2 = x1x2 *x1
x2x1 = x1x2_x2.T / x2

# x2x6
x6x2_x6 = x6x2 *x6
x2x6 = x6x2_x6.T / x2

## BN1

In [49]:
model = BayesianModel([('x2', 'x3')])

cpd_x2 = TabularCPD('x2', np.shape(x2)[0], [x2])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x2,cpd_x2x3)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

estimator = BayesianEstimator(model, sample)

estimator.get_parameters()
print('K2Score: ', K2Score(sample).score(model))

K2Score:  -2302.334898666883


## BN2

In [50]:
model = BayesianModel([('x6', 'x2'), ('x2', 'x3')])

cpd_x6 = TabularCPD('x6', np.shape(x6)[0], [x6])
cpd_x6x2 = TabularCPD('x2', np.shape(x6x2)[0], x6x2, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x6, cpd_x6x2, cpd_x2x3)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -3641.8300251491364


## BN3

In [51]:
model = BayesianModel([('x6', 'x2'), ('x6', 'x1'), ('x2', 'x3')])

cpd_x6 = TabularCPD('x6', np.shape(x6)[0], [x6])
cpd_x6x1 = TabularCPD('x1', np.shape(x6x1)[0], x6x1, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x6x2 = TabularCPD('x2', np.shape(x6x2)[0], x6x2, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x6, cpd_x6x2, cpd_x6x1, cpd_x2x3)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -4399.982028229801


## BN4

In [52]:
model = BayesianModel([('x6', 'x2'), ('x2', 'x3'), ('x2', 'x5')])

cpd_x6 = TabularCPD('x6', np.shape(x6)[0], [x6])
cpd_x6x2 = TabularCPD('x2', np.shape(x6x2)[0], x6x2, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x2x5 = TabularCPD('x5', np.shape(x2x5)[0], x2x5, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x6, cpd_x6x2, cpd_x2x5, cpd_x2x3)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -4863.090586565958


## BN5

In [53]:
model = BayesianModel([('x6', 'x4'), ('x6', 'x1'), ('x1', 'x2'), ('x2', 'x3'), ('x2', 'x5')])

cpd_x6 = TabularCPD('x6', np.shape(x6)[0], [x6])
cpd_x6x4 = TabularCPD('x4', np.shape(x6x4)[0], x6x4, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x6x1 = TabularCPD('x1', np.shape(x6x1)[0], x6x1, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x1x2 = TabularCPD('x2', np.shape(x1x2)[0], x1x2, evidence=['x1'], evidence_card=[np.shape(x1)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x2x5 = TabularCPD('x5', np.shape(x2x5)[0], x2x5, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x6, cpd_x6x4, cpd_x6x1, cpd_x1x2, cpd_x2x3, cpd_x2x5)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -6465.23998164203


## BN6

In [54]:
model = BayesianModel([('x6', 'x4'), ('x6', 'x2'), ('x2', 'x3'), ('x2', 'x1'), ('x3', 'x5')])

cpd_x6 = TabularCPD('x6', np.shape(x6)[0], [x6])
cpd_x6x4 = TabularCPD('x4', np.shape(x6x4)[0], x6x4, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x6x2 = TabularCPD('x2', np.shape(x6x2)[0], x6x2, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x2x1 = TabularCPD('x1', np.shape(x2x1)[0], x2x1, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x3x5 = TabularCPD('x5', np.shape(x3x5)[0], x3x5, evidence=['x3'], evidence_card=[np.shape(x3)[0]])

model.add_cpds(cpd_x6, cpd_x6x4, cpd_x6x2, cpd_x2x3, cpd_x2x1, cpd_x3x5)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -6429.554698850261


## BN7

In [55]:
model = BayesianModel([('x1', 'x6'), ('x1', 'x2'), ('x6', 'x4'), ('x2', 'x3'), ('x2', 'x5')])

cpd_x1 = TabularCPD('x1', np.shape(x1)[0], [x1])
cpd_x6x4 = TabularCPD('x4', np.shape(x6x4)[0], x6x4, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x1x6 = TabularCPD('x6', np.shape(x1x6)[0], x1x6, evidence=['x1'], evidence_card=[np.shape(x1)[0]])
cpd_x1x2 = TabularCPD('x2', np.shape(x1x2)[0], x1x2, evidence=['x1'], evidence_card=[np.shape(x1)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x2x5 = TabularCPD('x5', np.shape(x2x5)[0], x2x5, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x1, cpd_x6x4, cpd_x1x6, cpd_x1x2, cpd_x2x3, cpd_x2x5)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -6481.007156260817


## BN8

In [56]:
model = BayesianModel([('x4', 'x6'), ('x6', 'x1'), ('x1', 'x2'), ('x2', 'x3'), ('x2', 'x5')])

cpd_x4 = TabularCPD('x4', np.shape(x4)[0], [x4])
cpd_x4x6 = TabularCPD('x6', np.shape(x4x6)[0], x4x6, evidence=['x4'], evidence_card=[np.shape(x4)[0]])
cpd_x6x1 = TabularCPD('x1', np.shape(x6x1)[0], x6x1, evidence=['x6'], evidence_card=[np.shape(x6)[0]])
cpd_x1x2 = TabularCPD('x2', np.shape(x1x2)[0], x1x2, evidence=['x1'], evidence_card=[np.shape(x1)[0]])
cpd_x2x3 = TabularCPD('x3', np.shape(x2x3)[0], x2x3, evidence=['x2'], evidence_card=[np.shape(x2)[0]])
cpd_x2x5 = TabularCPD('x5', np.shape(x2x5)[0], x2x5, evidence=['x2'], evidence_card=[np.shape(x2)[0]])

model.add_cpds(cpd_x4, cpd_x4x6, cpd_x6x1, cpd_x1x2, cpd_x2x3, cpd_x2x5)

inference = BayesianModelSampling(model)
sample = inference.forward_sample(size=1000)

print('K2Score: ', K2Score(sample).score(model))

K2Score:  -6461.5366229964675


# Task 3

## Converting BN5 to Markov model

In [57]:
from pgmpy.models import BayesianModel
G = BayesianModel([('x1', 'x6'), ('x1', 'x2'), ('x6', 'x4'), ('x2', 'x3'), ('x2', 'x5')])
mm = G.to_markov_model()
print(mm.nodes())
print(mm.edges())

['x1', 'x6', 'x2', 'x4', 'x3', 'x5']
[('x1', 'x6'), ('x1', 'x2'), ('x6', 'x4'), ('x2', 'x3'), ('x2', 'x5')]
