Documentation : http://pgmpy.org/models.html

#Bayesian Model

In [1]:
!pip install pgmpy

Collecting pgmpy
[?25l  Downloading https://files.pythonhosted.org/packages/a3/0e/d9fadbfaa35e010c04d43acd3ae9fbefec98897dd7d61a6b7eb5a8b34072/pgmpy-0.1.14-py3-none-any.whl (331kB)
[K     |█                               | 10kB 14.9MB/s eta 0:00:01[K     |██                              | 20kB 19.7MB/s eta 0:00:01[K     |███                             | 30kB 23.2MB/s eta 0:00:01[K     |████                            | 40kB 25.7MB/s eta 0:00:01[K     |█████                           | 51kB 27.6MB/s eta 0:00:01[K     |██████                          | 61kB 29.7MB/s eta 0:00:01[K     |███████                         | 71kB 21.9MB/s eta 0:00:01[K     |████████                        | 81kB 18.9MB/s eta 0:00:01[K     |█████████                       | 92kB 19.3MB/s eta 0:00:01[K     |█████████▉                      | 102kB 20.4MB/s eta 0:00:01[K     |██████████▉                     | 112kB 20.4MB/s eta 0:00:01[K     |███████████▉                    | 122kB 20.4MB/

Bayesian Model

In [2]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete.CPD import TabularCPD

Add CPD (Conditional Probability Distribution) to the Bayesian Model

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
model = BayesianModel([('A', 'B'), ('B', 'C')])
cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
cpd_b = TabularCPD('B', 2, [[0.3, 0.7], [0.7, 0.3]],
                   evidence=['A'],
                   evidence_card=[2])
cpd_c = TabularCPD('C', 2, [[0.1, 0.9], [0.9, 0.1]],
                   evidence=['B'],
                   evidence_card=[2])
model.add_cpds(cpd_a, cpd_b, cpd_c)
copy_model = model.copy()
copy_model.nodes()

NodeView(('A', 'B', 'C'))

In [None]:
copy_model.edges()

OutEdgeView([('A', 'B'), ('B', 'C')])

In [None]:
len(copy_model.get_cpds())

3

Estimates the CPD for each variable based on a given data set.

In [None]:
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
model = BayesianModel([('A', 'C'), ('B', 'C')])
model.fit(data)
model.get_cpds()

  import pandas.util.testing as tm


[<TabularCPD representing P(A:2) at 0x7f3c00043e90>,
 <TabularCPD representing P(B:2) at 0x7f3bfffe9ed0>,
 <TabularCPD representing P(C:2 | A:2, B:2) at 0x7f3c0ede8e50>]

Returns the cardinality of the node. Throws an error if the CPD for the queried node hasn’t been added to the network.

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
cpd_diff = TabularCPD('diff', 2, [[0.6], [0.4]]);
cpd_intel = TabularCPD('intel', 2, [[0.7], [0.3]]);
cpd_grade = TabularCPD('grade', 2, [[0.1, 0.9, 0.2, 0.7],
                                    [0.9, 0.1, 0.8, 0.3]],
                                ['intel', 'diff'], [2, 2])
student.add_cpds(cpd_diff,cpd_intel,cpd_grade)
student.get_cardinality()

defaultdict(int, {'diff': 2, 'grade': 2, 'intel': 2})

Returns the cpd of the node. If node is not specified returns all the CPDs that have been added till now to the graph

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
cpd = TabularCPD('grade', 2, [[0.1, 0.9, 0.2, 0.7],
                              [0.9, 0.1, 0.8, 0.3]],
                 ['intel', 'diff'], [2, 2])
student.add_cpds(cpd)
student.get_cpds()

[<TabularCPD representing P(grade:2 | intel:2, diff:2) at 0x7f3bfed57610>]

Returns a markov blanket for a random variable. In the case of Bayesian Networks, the markov blanket is the set of node’s parents, its children and its children’s other parents.

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
G = BayesianModel([('x', 'y'), ('z', 'y'), ('y', 'w'), ('y', 'v'), ('u', 'w'),
                   ('s', 'v'), ('w', 't'), ('w', 'm'), ('v', 'n'), ('v', 'q')])
G.get_markov_blanket('y')

['z', 'x', 'u', 'w', 's', 'v']

In [None]:
G.get_markov_blanket('x')

['z', 'y']

In [None]:
G.get_markov_blanket('z')

['y', 'x']

In [None]:
G.get_markov_blanket('w')

['t', 'm', 'y', 'u']

Predicts states of all the missing variables.

In [None]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                      columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:800]
predict_data = values[800:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(train_data)
predict_data = predict_data.copy()
predict_data.drop('E', axis=1, inplace=True)
y_pred = model.predict(predict_data)
y_pred

100%|██████████| 16/16 [00:04<00:00,  3.71it/s]


Unnamed: 0,E
0,1
1,1
2,0
3,0
4,1
...,...
195,0
196,1
197,0
198,0


Predicts probabilities of all states of the missing variables.

In [None]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
                      columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('B', axis=1, inplace=True)
y_prob = model.predict_probability(predict_data)
y_prob

Unnamed: 0,B_0,B_1
80,0.526221,0.473779
81,0.526221,0.473779
82,0.406,0.594
83,0.526221,0.473779
84,0.526221,0.473779
85,0.354251,0.645749
86,0.526221,0.473779
87,0.406,0.594
88,0.354251,0.645749
89,0.396975,0.603025


Removes the cpds that are provided in the argument.

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
cpd = TabularCPD('grade', 2, [[0.1, 0.9, 0.2, 0.7],
                              [0.9, 0.1, 0.8, 0.3]],
                 ['intel', 'diff'], [2, 2])
student.add_cpds(cpd)
student.remove_cpds(cpd)

In [None]:
student.get_cpds()

[]

Remove node from the model.

Removing a node also removes all the associated edges, removes the CPD of the node and marginalizes the CPDs of it’s children.

In [None]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianModel
model = BayesianModel([('A', 'B'), ('B', 'C'),
                       ('A', 'D'), ('D', 'C')])
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 4)),
                      columns=['A', 'B', 'C', 'D'])
model.fit(values)
print(model.get_cpds())
print()
model.remove_node('A')
model.get_cpds()

[<TabularCPD representing P(A:2) at 0x7f3bfe38c910>, <TabularCPD representing P(B:2 | A:2) at 0x7f3bfecaa610>, <TabularCPD representing P(C:2 | B:2, D:2) at 0x7f3bfe38c590>, <TabularCPD representing P(D:2 | A:2) at 0x7f3bfe48e990>]



[<TabularCPD representing P(B:2) at 0x7f3bfecaa610>,
 <TabularCPD representing P(C:2 | B:2, D:2) at 0x7f3bfe38c590>,
 <TabularCPD representing P(D:2) at 0x7f3bfe48e990>]

Remove multiple nodes from the model.

Removing a node also removes all the associated edges, removes the CPD of the node and marginalizes the CPDs of it’s children.

In [None]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianModel
model = BayesianModel([('A', 'B'), ('B', 'C'),
                       ('A', 'D'), ('D', 'C')])
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 4)),
                      columns=['A', 'B', 'C', 'D'])
model.fit(values)
model.get_cpds()

[<TabularCPD representing P(A:2) at 0x7f3bfecaa6d0>,
 <TabularCPD representing P(B:2 | A:2) at 0x7f3bfe3177d0>,
 <TabularCPD representing P(C:2 | B:2, D:2) at 0x7f3bfe38e790>,
 <TabularCPD representing P(D:2 | A:2) at 0x7f3bfe38eed0>]

In [None]:
model.remove_nodes_from(['A', 'B'])
model.get_cpds()

[<TabularCPD representing P(C:2 | D:2) at 0x7f3bfe38e790>,
 <TabularCPD representing P(D:2) at 0x7f3bfe38eed0>]

to_markov_model


Converts bayesian model to markov model. The markov model created would be the moral graph of the bayesian model.

In [None]:
from pgmpy.models import BayesianModel
G = BayesianModel([('diff', 'grade'), ('intel', 'grade'),
                   ('intel', 'SAT'), ('grade', 'letter')])
mm = G.to_markov_model()
print(mm.nodes())

mm.edges()

['diff', 'grade', 'intel', 'letter', 'SAT']


EdgeView([('diff', 'grade'), ('diff', 'intel'), ('grade', 'letter'), ('grade', 'intel'), ('intel', 'SAT')])

#Markov Model

A MarkovModel stores nodes and edges with potentials

MarkovModel holds undirected edges.

Create an empty Markov Model with no nodes and no edges.

In [None]:
from pgmpy.models import MarkovModel
import numpy as np

In [None]:
G = MarkovModel()

##Nodes

G can be grown by adding nodes.

Add one node at a time:

In [None]:
G.add_node('a')

Add the nodes from any container (a list, set or tuple or the nodes from another graph).

In [None]:
G.add_nodes_from(['a', 'b'])

##Edges


G can also be grown by adding edges.



Add one edge,

In [None]:
G.add_edge('a', 'b')

a list of edges,

In [None]:
G.add_edges_from([('a', 'b'), ('b', 'c')])

In [None]:
'a' in G

True

In [None]:
len(G)

3

Add an edge between u and v.

The nodes u and v will be automatically added if they are not already in the graph

In [None]:
from pgmpy.models import MarkovModel
G = MarkovModel()
G.add_nodes_from(['Alice', 'Bob', 'Charles'])
G.add_edge('Alice', 'Bob')

Associate a factor to the graph. See factors class for the order of potential values

In [None]:
from pgmpy.factors.discrete import DiscreteFactor
student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles'),
                       ('Charles', 'Debbie'), ('Debbie', 'Alice')])
factor = DiscreteFactor(['Alice', 'Bob'], cardinality=[3, 2],
                values=np.random.rand(6))
student.add_factors(factor)

In [None]:
student.edges()

EdgeView([('Alice', 'Bob'), ('Alice', 'Debbie'), ('Bob', 'Charles'), ('Charles', 'Debbie')])

In [None]:
student.nodes()

NodeView(('Alice', 'Bob', 'Charles', 'Debbie'))

In [None]:
student.get_factors()

[<DiscreteFactor representing phi(Alice:3, Bob:2) at 0x7f1d414adb10>]

In [None]:
student.get_cardinality(node='Alice')

3

In [None]:
student.get_cardinality()

defaultdict(int, {'Alice': 3, 'Bob': 2})

In [None]:
student.get_local_independencies()

(Alice ⟂ Charles | Bob, Debbie)
(Bob ⟂ Debbie | Alice, Charles)
(Charles ⟂ Alice | Bob, Debbie)
(Debbie ⟂ Bob | Charles, Alice)

Convert Markov model into Bayesian

In [None]:
from pgmpy.models import MarkovModel
from pgmpy.factors.discrete import DiscreteFactor
mm = MarkovModel()
mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
                   ('x4', 'x7'), ('x5', 'x7')])
phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
mm.add_factors(*phi)
bm = mm.to_bayesian_model()

  S_by_C = {key: S[key] / C[key] for key in S}
