In [2]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD


In [3]:
model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')])

In [4]:
cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.6], [0.4]])
cpd_i = TabularCPD(variable='I', variable_card=2, values=[[0.7], [0.3]])

In [5]:
cpd_g = TabularCPD(variable='G', variable_card=3, 
                   values=[[0.3, 0.05, 0.9,  0.5],
                           [0.4, 0.25, 0.08, 0.3],
                           [0.3, 0.7,  0.02, 0.2]],
                  evidence=['I', 'D'],
                  evidence_card=[2, 2])

In [6]:
cpd_l = TabularCPD(variable='L', variable_card=2, 
                   values=[[0.1, 0.4, 0.99],
                           [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])

In [7]:
cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

In [8]:
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

In [9]:
model.check_model()

True

In [10]:
cpd_d_sn = TabularCPD(variable='D', variable_card=2, values=[[0.6], [0.4]], state_names={'D': ['Easy', 'Hard']})
cpd_i_sn = TabularCPD(variable='I', variable_card=2, values=[[0.7], [0.3]], state_names={'I': ['Dumb', 'Intelligent']})
cpd_g_sn = TabularCPD(variable='G', variable_card=3, 
                      values=[[0.3, 0.05, 0.9,  0.5],
                              [0.4, 0.25, 0.08, 0.3],
                              [0.3, 0.7,  0.02, 0.2]],
                      evidence=['I', 'D'],
                      evidence_card=[2, 2],
                      state_names={'G': ['A', 'B', 'C'],
                                   'I': ['Dumb', 'Intelligent'],
                                   'D': ['Easy', 'Hard']})


In [11]:
cpd_l_sn = TabularCPD(variable='L', variable_card=2, 
                      values=[[0.1, 0.4, 0.99],
                              [0.9, 0.6, 0.01]],
                      evidence=['G'],
                      evidence_card=[3],
                      state_names={'L': ['Bad', 'Good'],
                                   'G': ['A', 'B', 'C']})

In [12]:
cpd_s_sn = TabularCPD(variable='S', variable_card=2,
                      values=[[0.95, 0.2],
                              [0.05, 0.8]],
                      evidence=['I'],
                      evidence_card=[2],
                      state_names={'S': ['Bad', 'Good'],
                                   'I': ['Dumb', 'Intelligent']})

In [13]:
model.add_cpds(cpd_d_sn, cpd_i_sn, cpd_g_sn, cpd_l_sn, cpd_s_sn)
model.check_model()



True

In [14]:
model.get_cpds()

[<TabularCPD representing P(D:2) at 0x198ba7e1220>,
 <TabularCPD representing P(I:2) at 0x198ba7e11c0>,
 <TabularCPD representing P(G:3 | I:2, D:2) at 0x198ba7e1250>,
 <TabularCPD representing P(L:2 | G:3) at 0x1989ce64b50>,
 <TabularCPD representing P(S:2 | I:2) at 0x198ba7bacd0>]

In [15]:
print(cpd_g)

+------+------+------+------+------+
| I    | I(0) | I(0) | I(1) | I(1) |
+------+------+------+------+------+
| D    | D(0) | D(1) | D(0) | D(1) |
+------+------+------+------+------+
| G(0) | 0.3  | 0.05 | 0.9  | 0.5  |
+------+------+------+------+------+
| G(1) | 0.4  | 0.25 | 0.08 | 0.3  |
+------+------+------+------+------+
| G(2) | 0.3  | 0.7  | 0.02 | 0.2  |
+------+------+------+------+------+


In [16]:
print(model.get_cpds('G'))

+------+---------+---------+----------------+----------------+
| I    | I(Dumb) | I(Dumb) | I(Intelligent) | I(Intelligent) |
+------+---------+---------+----------------+----------------+
| D    | D(Easy) | D(Hard) | D(Easy)        | D(Hard)        |
+------+---------+---------+----------------+----------------+
| G(A) | 0.3     | 0.05    | 0.9            | 0.5            |
+------+---------+---------+----------------+----------------+
| G(B) | 0.4     | 0.25    | 0.08           | 0.3            |
+------+---------+---------+----------------+----------------+
| G(C) | 0.3     | 0.7     | 0.02           | 0.2            |
+------+---------+---------+----------------+----------------+


In [17]:
model.get_cardinality('G')

3

In [18]:
model.local_independencies('G')

(G ⟂ S | I, D)

In [19]:
model.local_independencies(['D', 'I', 'S', 'G', 'L'])

(D ⟂ S, I)
(I ⟂ D)
(S ⟂ G, L, D | I)
(G ⟂ S | I, D)
(L ⟂ S, I, D | G)

In [20]:
model.active_trail_nodes('D')

{'D': {'D', 'G', 'L'}}

In [21]:
model.active_trail_nodes('D', observed='G')

{'D': {'D', 'I', 'S'}}

In [22]:
from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
g_dist = infer.query(['G'])
print(g_dist)

Finding Elimination Order: :   0%|                                                               | 0/4 [00:00<?, ?it/s]
  0%|                                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: S:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: I:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: L:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: D: 100%|███████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 572.99it/s][A

+------+----------+
| G    |   phi(G) |
| G(A) |   0.3620 |
+------+----------+
| G(B) |   0.2884 |
+------+----------+
| G(C) |   0.3496 |
+------+----------+





In [23]:

print(infer.query(['G'], evidence={'D': 'Easy', 'I': 'Intelligent'}))


  0%|                                                                                            | 0/2 [00:00<?, ?it/s][A
Finding Elimination Order: :   0%|                                                               | 0/2 [00:00<?, ?it/s][A

  0%|                                                                                            | 0/2 [00:00<?, ?it/s][A[A

Eliminating: S:   0%|                                                                            | 0/2 [00:00<?, ?it/s][A[A

Eliminating: L:   0%|                                                                            | 0/2 [00:10<?, ?it/s][A[A

Finding Elimination Order: : 100%|███████████████████████████████████████████████████████| 4/4 [01:00<00:00, 15.02s/it][A[A
Eliminating: L: 100%|████████████████████████████████████████████████████████████████████| 2/2 [01:10<00:00, 35.04s/it]

+------+----------+
| G    |   phi(G) |
| G(A) |   0.9000 |
+------+----------+
| G(B) |   0.0800 |
+------+----------+
| G(C) |   0.0200 |
+------+----------+



Finding Elimination Order: : 100%|███████████████████████████████████████████████████████| 2/2 [01:20<00:00, 40.05s/it]


In [24]:
infer.map_query(['G'])

Finding Elimination Order: :   0%|                                                               | 0/4 [00:00<?, ?it/s]
  0%|                                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: S:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: I:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: L:   0%|                                                                            | 0/4 [00:00<?, ?it/s][A
Eliminating: D: 100%|███████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 498.73it/s][A


{'G': 'A'}

In [None]:
#The IDE that I run this on is Spyder, not notebooks. PyAgrum is a framework I use for this
import pyAgrum as gum

# Iniialize the Bayesian Network
bn = gum.BayesNet('MyNetwork')
#the nodes
Exam = bn.add(gum.LabelizedVariable('Exam', 'Test Variable1', 2))
T2 = bn.add(gum.LabelizedVariable('T2', 'Testvariable 2', 2))
T3 = bn.add(gum.LabelizedVariable('T3', 'Test Variable 3', 2))
W = bn.add(gum.LabelizedVariable('T4','Test Variable 4',2))

#edges

for link in [(T1,T2), (T2,T3), (T1,T3)]:
    bn.addArc(*link)
#define conditional probbilities
#all of these need to be informed by literature
bn.cpt(Exam)[:] = [0.2, 0.8]
bn.cpt(T2)[0, :] = [0.4, 0.6]
bn.cpt(T2)[1, :] = [0.01, 0.99]
bn.cpt(T3)[{'Exam':0,'T2': 0}] = [0.0, 1.0]
bn.cpt(T3)[{'Exam':0,'T2': 1}] = [0.8, 1.2]
bn.cpt(T3)[{'Exam':1,'T2': 0}] = [0.9, 0.1]
bn.cpt(T3)[{'Exam':1,'T2': 1}] = [0.99, 0.01]

#the function in the package
dbg = gum.BNDatabaseGenerator(bn)

#Optimal sample size needs to be determined
dbg.drawSamples(100)
#csv of code
dbg.toCSV('FakeDB.csv')
#

In [None]:
bn2 = gum.BayesNet('TestNetwork')
Examlevel = bn2.add(gum.LabelizedVariable('Examlevel', 'Variable1', 2))
IQlevel = bn2.add(gum.LabelizedVariable('IQlevel','Variable2',2))
Marks = bn2.add(gum.LabelizedVariable('Marks','Variable3',2))
Aptiscore = bn2.add(gum.LabelizedVariable('Aptiscore','Variable4',4))
Admission = bn2.add(gum.LabelizedVariable('Admission','Variable5',4))

bn2 = gum.fastBN("Examlevel->Marks<-IQlevel->Aptiscore;Marks->Admission")

#the first set of cariables that lead to marks
bn2.cpt('IQlevel')[:] = [0.8, 0.2]

bn2.cpt('Examlevel')[:] = [0.7, 0.36]

bn2.cpt('Marks')[{'Examlevel':0, 'IQlevel':0}] = [0.6, 0.4]
bn2.cpt('Marks')[{'Examlevel':0, 'IQlevel':1}] = [0.9, 0.1]
bn2.cpt('Marks')[{'Examlevel':1, 'IQlevel':0}] = [0.5, 0.5]
bn2.cpt('Marks')[{'Examlevel':1, 'IQlevel':0}] = [0.8, 0.2]

#the second set of variables that lead to admission given marks
bn2.cpt('Admission')[{'Marks':0}] = [0.6, 0.4]
bn2.cpt('Admission')[{'Marks':1}] = [0.9, 0.1]

#the indepenand node IQ that leads to APtiscore
bn2.cpt('Aptiscore')[{'IQlevel':0}] = [0.75, 0.25]
bn2.cpt('Aptiscore')[{'IQlevel':1}] = [0.4, 0.6]


dbg = gum.BNDatabaseGenerator(bn2)
dbg.drawSamples(100)
dbg.toCSV('Mytest.csv')

In [None]:
#import packages
import pyAgrum as gum

#declare the network "ConvBN", for this case call it the "TestNetwork"
ConvBN = gum.BayesNet('TestNetwork')

#declare all the different variables in the ConvBN network
Examlevel = ConvBN.add(gum.LabelizedVariable('Examlevel', 'Variable1', 2))
IQlevel = ConvBN.add(gum.LabelizedVariable('IQlevel','Variable2',2))
Marks = ConvBN.add(gum.LabelizedVariable('Marks','Variable3',2))
Aptiscore = ConvBN.add(gum.LabelizedVariable('Aptiscore','Variable4',8))
Admission = ConvBN.add(gum.LabelizedVariable('Admission','Variable5',4))

#define the structure of the nodes. Note that the node "Marks to Admission" is seperate
ConvBN = gum.fastBN("Examlevel->Marks<-IQlevel->Aptiscore ; Marks->Admission")

#the top most independent nodes, "Exam level" and "IQ level"
ConvBN.cpt('IQlevel')[:] = [0.8, 0.2]
ConvBN.cpt('Examlevel')[:] = [0.7, 0.36]

#The "Marks" as a result of "IQ level" and "Exam level"
ConvBN.cpt('Marks')[{'Examlevel':0, 'IQlevel':0}] = [0.6, 0.4]
ConvBN.cpt('Marks')[{'Examlevel':0, 'IQlevel':1}] = [0.9, 0.1]
ConvBN.cpt('Marks')[{'Examlevel':1, 'IQlevel':0}] = [0.5, 0.5]
ConvBN.cpt('Marks')[{'Examlevel':1, 'IQlevel':0}] = [0.8, 0.2]

#the second set of variables that lead to "Admission" given "Marks"
ConvBN.cpt('Admission')[{'Marks':0}] = [0.6, 0.4]
ConvBN.cpt('Admission')[{'Marks':1}] = [0.9, 0.1]

#the indepenand node "APtiscore" given "IQ level"
ConvBN.cpt('Aptiscore')[{'IQlevel':0}] = [0.75, 0.25]
ConvBN.cpt('Aptiscore')[{'IQlevel':1}] = [0.4, 0.6]

#A very neat function of the PyAgrum library is database generation
dbg = gum.BNDatabaseGenerator(ConvBN)

#declare number of samples in the dataset
dbg.drawSamples(1000)

W
dbg.toCSV('GenerateFakedatabasedonnetwork.csv')