In [2]:
import pgmpy
from pgmpy.models import FactorGraph, BayesianModel
from pgmpy.factors.discrete.CPD import TabularCPD

In [None]:
pgmpy.factors.distributions.

In [33]:
G = BayesianModel()

G.add_nodes_from(['r', 'red', 'blue'])
G.add_edges_from([('red', 'r'), ('blue', 'r')])
r_cpd = TabularCPD('r', 3, [[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 1]], 
                 evidence=['red', 'blue'], evidence_card=[2,2])

red_cpd = TabularCPD('red', 2, [[0.3, 0.7]])
blue_cpd = TabularCPD('blue', 2, [[0.5, 0.5]])

G.add_cpds(r_cpd, red_cpd, blue_cpd)

In [2]:
FG = FactorGraph()

In [3]:
FG.add_nodes_from(['r', 'red', 'blue'])

In [31]:
cpd = TabularCPD('r', 3, [[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 1]], 
                 evidence=['red', 'blue'], evidence_card=[2,2])

In [52]:
print(r_cpd)

╒══════╤════════╤════════╤════════╤════════╕
│ red  │ red_0  │ red_0  │ red_1  │ red_1  │
├──────┼────────┼────────┼────────┼────────┤
│ blue │ blue_0 │ blue_1 │ blue_0 │ blue_1 │
├──────┼────────┼────────┼────────┼────────┤
│ r_0  │ 0.0    │ 0.0    │ 1.0    │ 0.0    │
├──────┼────────┼────────┼────────┼────────┤
│ r_1  │ 0.0    │ 1.0    │ 0.0    │ 0.0    │
├──────┼────────┼────────┼────────┼────────┤
│ r_2  │ 1.0    │ 0.0    │ 0.0    │ 1.0    │
╘══════╧════════╧════════╧════════╧════════╛


In [54]:
G.get_independencies()

(red _|_ blue)
(blue _|_ red)

In [8]:
import pandas as pd
import numpy as np

In [64]:
d = {'r':[0, 1]}
df = pd.DataFrame(data=d)

In [61]:
G.get_cpds('red')

<TabularCPD representing P(red:2) at 0x7f74aacc8ef0>

In [65]:
print(G.predict(df))
G.predict_probability(df)

   blue  red
0     0    1
1     1    0


TypeError: 'NoneType' object is not subscriptable

In [51]:
values = pd.DataFrame(np.random.randint(low=0, high =2, size=(100, 5)),
                     columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('B', axis=1, inplace=True)
y_prob = model.predict_probability(predict_data)
y_prob

Unnamed: 0,B_0,B_1
80,0.555556,0.444444
81,0.555556,0.444444
82,0.454545,0.545455
83,0.454545,0.545455
84,0.481481,0.518519
85,0.555556,0.444444
86,0.481481,0.518519
87,0.454545,0.545455
88,0.481481,0.518519
89,0.458333,0.541667


In [70]:
c = model.get_cpds('B')
c.state_names

{'A': [0, 1], 'B': [0, 1], 'C': [0, 1], 'D': [0, 1], 'E': [0, 1]}

In [72]:
print(c)

╒══════╤═════════════════════╤═════════════════════╤════════════════════╤════════════════════╕
│ A    │ A(0)                │ A(0)                │ A(1)               │ A(1)               │
├──────┼─────────────────────┼─────────────────────┼────────────────────┼────────────────────┤
│ C    │ C(0)                │ C(1)                │ C(0)               │ C(1)               │
├──────┼─────────────────────┼─────────────────────┼────────────────────┼────────────────────┤
│ B(0) │ 0.45454545454545453 │ 0.48148148148148145 │ 0.5555555555555556 │ 0.4583333333333333 │
├──────┼─────────────────────┼─────────────────────┼────────────────────┼────────────────────┤
│ B(1) │ 0.5454545454545454  │ 0.5185185185185185  │ 0.4444444444444444 │ 0.5416666666666666 │
╘══════╧═════════════════════╧═════════════════════╧════════════════════╧════════════════════╛


In [4]:
import data

In [5]:
data_dict = {c:data.generate_data(c, 2) for c in ['red', 'green', 'blue', 'yellow']}

In [6]:
data_dict

{'blue': [(0.014873142533296985, 0.095660808080570398, 0.87879563535289651),
  (0.076807118790386875, 0.0053039525051971157, 0.94913488283783187)],
 'green': [(0.026603177501389786, 0.80554215802813534, 0.04294167766748283),
  (0.0074028526061128324, 0.7003192168627983, 0.045685555539905331)],
 'red': [(0.84862318129058856, 0.11480864967409754, 0.096531346981178545),
  (0.83569474366845387, 0.056798580513171708, 0.02193585406980619)],
 'yellow': [(0.8395877710818096, 0.94314033423982102, 0.088615956245045063),
  (0.92837660854875037, 0.97335743215229031, 0.018962754812304704)]}

In [35]:
G = BayesianModel()

G.add_nodes_from(['r', 'red', 'blue', 'c'])
G.add_edges_from([('red', 'c'), ('blue', 'c'), ('r', 'c')])

tbl = np.zeros((2, 12))
tbl[0] = 1
tbl[1, 2] = 1
tbl[0, 2] = 0
tbl[1, 5] = 1
tbl[0, 5] = 0

state_names = {'c':[0, 1], 'r':[0, 1,2], 'red':['not_red', 'red'], 'blue':[0, 1]}

c_cpd = TabularCPD('c', 2, tbl, 
                 evidence=['r', 'red', 'blue'], evidence_card=[3,2,2], 
                   state_names=state_names)
r_cpd = TabularCPD('r', 3, [[1/3, 1/3, 1/3]], state_names=state_names)
red_cpd = TabularCPD('red', 2, [[0.3, 0.7]], state_names=state_names)
blue_cpd = TabularCPD('blue', 2, [[0.5, 0.5]], state_names=state_names)

G.add_cpds(r_cpd, red_cpd, blue_cpd, c_cpd)

In [21]:
print(c_cpd)

╒══════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╤════════╕
│ r    │ r_0    │ r_0    │ r_0    │ r_0    │ r_1    │ r_1    │ r_1    │ r_1    │ r_2    │ r_2    │ r_2    │ r_2    │
├──────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┤
│ red  │ red_0  │ red_0  │ red_1  │ red_1  │ red_0  │ red_0  │ red_1  │ red_1  │ red_0  │ red_0  │ red_1  │ red_1  │
├──────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┤
│ blue │ blue_0 │ blue_1 │ blue_0 │ blue_1 │ blue_0 │ blue_1 │ blue_0 │ blue_1 │ blue_0 │ blue_1 │ blue_0 │ blue_1 │
├──────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┼────────┤
│ c_0  │ 1.0    │ 1.0    │ 0.0    │ 1.0    │ 1.0    │ 0.0    │ 1.0    │ 1.0    │ 1.0    │ 1.0    │ 1.0    │ 1.0    │
├──────┼────────┼────────┼────────┼────────┼────────┼────────┼──

In [36]:
d = {'c':[0, 1]}
df = pd.DataFrame(data=d)

In [37]:
G.predict_probability(df)

Unnamed: 0,blue_0,blue_1,r_0,r_1,r_2,red_not_red,red_red
0,0.46,0.54,0.26,0.34,0.4,0.3,0.7
1,0.7,0.3,0.7,0.3,0.0,0.3,0.7


In [38]:
from scipy.stats import multivariate_normal

normal_pdf = lambda x1, x2: multivariate_normal.pdf((x1, x2), [0,0], )

In [None]:
red_classifier = BayesianModel()

G.add_nodes_from(['red', 'o1'])
G.add_edge('o1', 'red')

state_names = {'red': ['not_red', 'red'], 'o1':[0,1]}

red_cpd = TabularCPD('red', 2, [[0.5, 0.5]], state_names=state_names)
