In [2]:
from pgmpy.independencies import IndependenceAssertion
assertion1 = IndependenceAssertion('X','Y')
assertion1

(X _|_ Y)

In [2]:
assertion2 = IndependenceAssertion('X', 'Y', 'Z')
assertion2

(X _|_ Y | Z)

In [3]:
from pgmpy.factors import JointProbabilityDistribution as Joint
distribution = Joint(['coin1', 'coin2'], [2, 2], [0.25, 0.25, 0.25, 0.25])
print(distribution)

╒═════════╤═════════╤══════════════════╕
│ coin1   │ coin2   │   P(coin1,coin2) │
╞═════════╪═════════╪══════════════════╡
│ coin1_0 │ coin2_0 │           0.2500 │
├─────────┼─────────┼──────────────────┤
│ coin1_0 │ coin2_1 │           0.2500 │
├─────────┼─────────┼──────────────────┤
│ coin1_1 │ coin2_0 │           0.2500 │
├─────────┼─────────┼──────────────────┤
│ coin1_1 │ coin2_1 │           0.2500 │
╘═════════╧═════════╧══════════════════╛


In [6]:
from pgmpy.factors import TabularCPD
quality = TabularCPD(variable = 'Quality',
                    variable_card = 3,
                    values = [[0.3],[0.5],[0.2]])
print(quality)
quality.variables


╒═══════════╤═════╕
│ Quality_0 │ 0.3 │
├───────────┼─────┤
│ Quality_1 │ 0.5 │
├───────────┼─────┤
│ Quality_2 │ 0.2 │
╘═══════════╧═════╛


['Quality']

In [7]:
quality.cardinality

array([3])

In [8]:
quality.values

array([ 0.3,  0.5,  0.2])

In [10]:
location = TabularCPD(variable='Location',
                     variable_card=2,
                     values=[[0.6],[0.4]])
print(location)

╒════════════╤═════╕
│ Location_0 │ 0.6 │
├────────────┼─────┤
│ Location_1 │ 0.4 │
╘════════════╧═════╛


In [28]:
from pgmpy.factors import TabularCPD
cost = TabularCPD(variable='cost',
                  variable_card = 2,
                 values=[[0.8,0.6,0.1,0.6,0.6,0.05],
                        [0.2,0.4,0.9,0.4,0.4,0.95]],
                 evidence=['Q','L'],
                 evidence_card=[3,2])
print(cost)

╒════════╤═════╤═════╤═════╤═════╤═════╤══════╕
│ Q      │ Q_0 │ Q_0 │ Q_1 │ Q_1 │ Q_2 │ Q_2  │
├────────┼─────┼─────┼─────┼─────┼─────┼──────┤
│ L      │ L_0 │ L_1 │ L_0 │ L_1 │ L_0 │ L_1  │
├────────┼─────┼─────┼─────┼─────┼─────┼──────┤
│ cost_0 │ 0.8 │ 0.6 │ 0.1 │ 0.6 │ 0.6 │ 0.05 │
├────────┼─────┼─────┼─────┼─────┼─────┼──────┤
│ cost_1 │ 0.2 │ 0.4 │ 0.9 │ 0.4 │ 0.4 │ 0.95 │
╘════════╧═════╧═════╧═════╧═════╧═════╧══════╛


In [26]:

from pgmpy.models import BayesianModel
model = BayesianModel()
#model.add_nodes_from(['traffic_jam','rain','accident'])
model.add_edge('rain','traffic_jam')
model.add_edge('accident','traffic_jam')
model.nodes()

['traffic_jam', 'accident', 'rain']

In [27]:
model.edges()

[('accident', 'traffic_jam'), ('rain', 'traffic_jam')]

In [29]:
from pgmpy.factors import TabularCPD
cpd_rain = TabularCPD('rain',2,[[0.4],[0.6]])
cpd_accident = TabularCPD('accident',2,[[0.2],[0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam',2,
                            [[0.9,0.6,0.7,0.1],
                            [0.1,0.4,0.3,0.9]],
                            evidence=['rain','accident'],
                            evidence_card=[2,2])
print(cpd_rain)
print(cpd_accident)
print(cpd_traffic_jam)

╒════════╤═════╕
│ rain_0 │ 0.4 │
├────────┼─────┤
│ rain_1 │ 0.6 │
╘════════╧═════╛
╒════════════╤═════╕
│ accident_0 │ 0.2 │
├────────────┼─────┤
│ accident_1 │ 0.8 │
╘════════════╧═════╛
╒═══════════════╤════════════╤════════════╤════════════╤════════════╕
│ rain          │ rain_0     │ rain_0     │ rain_1     │ rain_1     │
├───────────────┼────────────┼────────────┼────────────┼────────────┤
│ accident      │ accident_0 │ accident_1 │ accident_0 │ accident_1 │
├───────────────┼────────────┼────────────┼────────────┼────────────┤
│ traffic_jam_0 │ 0.9        │ 0.6        │ 0.7        │ 0.1        │
├───────────────┼────────────┼────────────┼────────────┼────────────┤
│ traffic_jam_1 │ 0.1        │ 0.4        │ 0.3        │ 0.9        │
╘═══════════════╧════════════╧════════════╧════════════╧════════════╛


In [30]:
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x10ba21908>,
 <TabularCPD representing P(accident:2) at 0x10ba219b0>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x10ba21240>]

In [31]:
model.add_node('long_queues')
model.add_edge('traffic_jam','long_queues')
cpd_long_queues = TabularCPD('long_queues', 2,
                            [[0.9, 0.2],
                            [0.1, 0.8]],
                            evidence = ['traffic_jam'],
                            evidence_card = [2])
model.add_cpds(cpd_long_queues)
model.add_nodes_from(['getting_up_late',
                    'late_for_school'])
model.add_edges_from([('getting_up_late', 'late_for_school'),
                     ('traffic_jam', 'late_for_school')])
cpd_getting_up_late = TabularCPD('getting_up_late', 2,
                                [[0.6],[0.4]])
cpd_late_for_school = TabularCPD('late_for_school',2,
                                [[0.9,0.45,0.8,0.1],
                                [0.1,0.55,0.2,0.9]],
                                evidence=['getting_up_late',
                                         'traffic_jam'],
                                evidence_card = [2,2])
model.add_cpds(cpd_getting_up_late, cpd_late_for_school)
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x10ba21908>,
 <TabularCPD representing P(accident:2) at 0x10ba219b0>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x10ba21240>,
 <TabularCPD representing P(long_queues:2 | traffic_jam:2) at 0x10ba21208>,
 <TabularCPD representing P(getting_up_late:2) at 0x10ba35a20>,
 <TabularCPD representing P(late_for_school:2 | getting_up_late:2, traffic_jam:2) at 0x10ba21358>]

In [55]:
#Parameterizing a Markov Network
from pgmpy.factors import Factor
phi = Factor(['A','B'], [2,3],[1000,1,5,100,20,30])
print(phi)

╒═════╤═════╤════════════╕
│ A   │ B   │   phi(A,B) │
╞═════╪═════╪════════════╡
│ A_0 │ B_0 │  1000.0000 │
├─────┼─────┼────────────┤
│ A_0 │ B_1 │     1.0000 │
├─────┼─────┼────────────┤
│ A_0 │ B_2 │     5.0000 │
├─────┼─────┼────────────┤
│ A_1 │ B_0 │   100.0000 │
├─────┼─────┼────────────┤
│ A_1 │ B_1 │    20.0000 │
├─────┼─────┼────────────┤
│ A_1 │ B_2 │    30.0000 │
╘═════╧═════╧════════════╛


In [56]:
phi_marginalized = phi.marginalize(['B'],inplace=False)
phi_marginalized.scope()
phi.marginalize(['A'])
print(phi)

╒═════╤═══════════╕
│ B   │    phi(B) │
╞═════╪═══════════╡
│ B_0 │ 1100.0000 │
├─────┼───────────┤
│ B_1 │   21.0000 │
├─────┼───────────┤
│ B_2 │   35.0000 │
╘═════╧═══════════╛


In [57]:
phi.scope()

['B']

In [2]:
from pgmpy.factors import Factor
import numpy as np
price = Factor(['price','quality','location'],
              [2,2,2],
              np.arange(8))
price_marginalized = price.marginalize(['quality', 'location'],
                                      inplace = False)
price_marginalized.scope()
print(price_marginalized)

╒═════════╤══════════════╕
│ price   │   phi(price) │
╞═════════╪══════════════╡
│ price_0 │       6.0000 │
├─────────┼──────────────┤
│ price_1 │      22.0000 │
╘═════════╧══════════════╛


In [15]:
phi = Factor(['a','b'],[2,2],[1000,1,5,100])
phi_reduced = phi.reduce([('b',0)],inplace=False)
print(phi_reduced)

╒═════╤═══════════╕
│ a   │    phi(a) │
╞═════╪═══════════╡
│ a_0 │ 1000.0000 │
├─────┼───────────┤
│ a_1 │    5.0000 │
╘═════╧═══════════╛


In [16]:
phi_reduced.scope()

['a']

In [17]:
phi.reduce([('a',1)])
print(phi)

╒═════╤══════════╕
│ b   │   phi(b) │
╞═════╪══════════╡
│ b_0 │   5.0000 │
├─────┼──────────┤
│ b_1 │ 100.0000 │
╘═════╧══════════╛


In [18]:
phi.scope()

['b']

In [19]:
price_reduced = price.reduce([('quality', 0),('location', 0)],
                            inplace=False)
price_reduced.scope()

['price']

In [21]:
phi1 = Factor(['a','b'],[2,2],[1000,1,5,100])
phi2 = Factor(['b','c'],[2,3],[1,100,5,200,3,1000])
phi = phi1 * phi2
phi.scope()

['a', 'b', 'c']

In [22]:
print(phi)

╒═════╤═════╤═════╤══════════════╕
│ a   │ b   │ c   │   phi(a,b,c) │
╞═════╪═════╪═════╪══════════════╡
│ a_0 │ b_0 │ c_0 │    1000.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_0 │ b_0 │ c_1 │  100000.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_0 │ b_0 │ c_2 │    5000.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_0 │ b_1 │ c_0 │     200.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_0 │ b_1 │ c_1 │       3.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_0 │ b_1 │ c_2 │    1000.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_0 │ c_0 │       5.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_0 │ c_1 │     500.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_0 │ c_2 │      25.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_1 │ c_0 │   20000.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_1 │ c_1 │     300.0000 │
├─────┼─────┼─────┼──────────────┤
│ a_1 │ b_1 │ c_2 │  100000.0000 │
╘═════╧═════╧═════╧══════════════╛


In [29]:
from pgmpy.models import MarkovModel
model = MarkovModel([('A','B'),('B','C')])
model.add_node('D')
model.add_edges_from([('C','D'),('D','A')])
from pgmpy.factors import Factor
factor_a_b = Factor(variables=['A','B'],cardinality=[2,2],values=[90,100,1,10])
factor_b_c = Factor(variables=['B','C'],cardinality=[2,2],values=[10,80,70,30])
factor_c_d = Factor(variables=['C','D'],cardinality=[2,2],values=[10,1,100,90])
factor_d_a = Factor(variables=['D','A'],cardinality=[2,2],values=[80,60,20,10])
model.add_factors(factor_a_b,factor_b_c,factor_c_d,factor_d_a)
model.get_factors()

[<Factor representing phi(A:2, B:2) at 0x207e92624e0>,
 <Factor representing phi(B:2, C:2) at 0x207e926a1d0>,
 <Factor representing phi(C:2, D:2) at 0x207e914f8d0>,
 <Factor representing phi(D:2, A:2) at 0x207e926a198>]

In [38]:
from pgmpy.models import MarkovModel
from pgmpy.factors import Factor
import numpy as np
model = MarkovModel()
model.add_nodes_from(['A','B','C','D'])
model.add_edges_from([('A','B'),('B','C'),('C','D'),('D','A')])
phi_A_B = Factor(['A','B'],[2,2],[1,100,100,1])
phi_B_C = Factor(['B','C'],[2,2],[100,1,1,100])
phi_C_D = Factor(['C','D'],[2,2],[1,100,100,1])
phi_D_A = Factor(['D','A'],[2,2],[100,1,1,100])
model.add_factors(phi_A_B,phi_B_C,phi_C_D,phi_D_A)
chordal_graph = model.triangulate()
chordal_graph.edges()

[('A', 'B'), ('A', 'D'), ('B', 'C'), ('B', 'D'), ('D', 'C')]

In [42]:
from pgmpy.models import BayesianModel
from pgmpy.factors import TabularCPD, Factor
from pgmpy.inference import BeliefPropagation
model = BayesianModel([('rain','traffic_jam'),
                      ('accident','traffic_jam'),
                      ('traffic_jam','long_queues'),
                      ('traffic_jam','late_for_school'),
                      ('getting_up_late','late_for_school')])
cpd_rain = TabularCPD('rain', 2,[[0.4],[0.6]])
cpd_accident = TabularCPD('accident',2,[[0.2],[0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam',2,
                            [[0.9,0.6,0.7,0.1],
                            [0.1,0.4,0.3,0.9]],
                            evidence=['rain','accident'],evidence_card=[2,2])
cpd_getting_up_late = TabularCPD('getting_up_late', 2, [[0.6,],[0.4]])
cpd_late_for_school = TabularCPD('late_for_school',2,
                                [[0.9,0.45,0.8,0.1],[0.1,0.55,0.2,0.9]],
                                evidence=['getting_up_late','traffic_jam'],
                                evidence_card=[2,2])
cpd_long_queues = TabularCPD('long_queues',2,
                            [[0.9,0.2],
                            [0.1,0.8]],
                            evidence=['traffic_jam'],
                            evidence_card=[2])
model.add_cpds(cpd_rain,cpd_accident,cpd_traffic_jam, cpd_getting_up_late,cpd_late_for_school,cpd_long_queues)
belief_propagation = BeliefPropagation(model)
belief_propagation.calibrate()
belief_propagation.get_clique_beliefs()

{('traffic_jam',
  'late_for_school',
  'getting_up_late'): <Factor representing phi(traffic_jam:2, late_for_school:2, getting_up_late:2) at 0x207e93e1080>,
 ('traffic_jam',
  'long_queues'): <Factor representing phi(traffic_jam:2, long_queues:2) at 0x207e93e1278>,
 ('traffic_jam',
  'rain',
  'accident'): <Factor representing phi(traffic_jam:2, rain:2, accident:2) at 0x207e93e12b0>}

In [43]:
belief_propagation.get_sepset_beliefs()

{frozenset({('traffic_jam', 'late_for_school', 'getting_up_late'),
            ('traffic_jam',
             'long_queues')}): <Factor representing phi(traffic_jam:2) at 0x207e9119d68>,
 frozenset({('traffic_jam', 'late_for_school', 'getting_up_late'),
            ('traffic_jam',
             'rain',
             'accident')}): <Factor representing phi(traffic_jam:2) at 0x207e93e1240>}

In [47]:
from pgmpy.models import BayesianModel
from pgmpy.factors import TabularCPD
from pgmpy.inference import VariableElimination
model = BayesianModel([('rain','traffic_jam'),
                      ('accident','traffic_jam'),
                      ('traffic_jam','long_queues'),
                      ('traffic_jam','late_for_school'),
                      ('getting_up_late','late_for_school')])
cpd_rain = TabularCPD('rain',2,[[0.4],[0.6]])
cpd_accident = TabularCPD('accident',2,[[0.2],[0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam',2,
                            [[0.9,0.6,0.7,0.1],
                            [0.1,0.4,0.3,0.9]],
                            evidence=['rain','accident'],
                            evidence_card=[2,2])
cpd_getting_up_late = TabularCPD('getting_up_late',2,[[0.6],[0.4]])
cpd_late_for_school = TabularCPD('late_for_school',2,
                                [[0.9,0.45,0.8,0.1],
                                [0.1,0.55,0.2,0.9]],
                                evidence=['getting_up_late','traffic_jam'],
                                evidence_card=[2,2])
cpd_long_queues = TabularCPD('long_queues',2,
                            [[0.9,0.2],[0.1,0.8]],evidence=['traffic_jam'],evidence_card=[2])
model.add_cpds(cpd_rain,cpd_accident,cpd_traffic_jam,cpd_getting_up_late,cpd_late_for_school,cpd_long_queues)
model_inference = VariableElimination(model)
model_inference.max_marginal(variables=['late_for_school'])

0.5714285714285714

In [49]:
model_inference.max_marginal(variables=['late_for_school'],evidence={'traffic_jam':1,'getting_up_late':0})

0.80000000000000004

In [50]:
model_inference.max_marginal(variables=['late_for_school','long_queues'],evidence={'traffic_jam':1,'getting_up_late':0})

0.6399999999999999

In [65]:
#Predictions from the model using pgmpy
import numpy as np
from pgmpy.models import BayesianModel
data = np.random.randint(low=0, high=2, size=(1000,4))
data = data.astype(float)
data

array([[ 0.,  1.,  1.,  1.],
       [ 1.,  1.,  0.,  0.],
       [ 1.,  1.,  0.,  1.],
       ..., 
       [ 1.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  0.,  1.]])

In [66]:
import pandas as pd
data = pd.DataFrame(data, columns=['cost','quality','location','no_of_people'])
data

Unnamed: 0,cost,quality,location,no_of_people
0,0.0,1.0,1.0,1.0
1,1.0,1.0,0.0,0.0
2,1.0,1.0,0.0,1.0
3,1.0,1.0,0.0,0.0
4,1.0,0.0,1.0,0.0
5,1.0,1.0,1.0,1.0
6,0.0,0.0,0.0,1.0
7,0.0,0.0,1.0,0.0
8,0.0,1.0,0.0,1.0
9,1.0,1.0,1.0,1.0


In [69]:
train = data[:750]
test = data[750:].drop('no_of_people',axis=1)
test
train

Unnamed: 0,cost,quality,location,no_of_people
0,0.0,1.0,1.0,1.0
1,1.0,1.0,0.0,0.0
2,1.0,1.0,0.0,1.0
3,1.0,1.0,0.0,0.0
4,1.0,0.0,1.0,0.0
5,1.0,1.0,1.0,1.0
6,0.0,0.0,0.0,1.0
7,0.0,0.0,1.0,0.0
8,0.0,1.0,0.0,1.0
9,1.0,1.0,1.0,1.0


In [2]:
import numpy as np
from scipy import optimize
f_to_optimize = lambda x: -np.sin(x)
optimize.fmin_cg(f_to_optimize, x0=[0])

Optimization terminated successfully.
         Current function value: -1.000000
         Iterations: 2
         Function evaluations: 15
         Gradient evaluations: 5


array([ 1.57079632])

In [3]:
import numpy as np
import pandas as pd
from pgmpy.models import MarkovModel
from pgmpy.estimators import MaximumLikelihoodEstimator
raw_data = np.random.randint(low=0, high=2, size=(100,2))
raw_data

array([[1, 1],
       [0, 1],
       [0, 0],
       [0, 1],
       [0, 0],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 0],
       [0, 0],
       [1, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [0, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 0],
       [0, 0],
       [1, 1],
       [1, 1],
       [1, 1],
       [0, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 1],
       [0, 1],
       [0, 0],
       [1, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 1],
       [0, 1],
       [1, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [1, 1],
       [0, 1],
       [0,

In [5]:
data = pd.DataFrame(raw_data, columns=['A','B'])
data

Unnamed: 0,A,B
0,1,1
1,0,1
2,0,0
3,0,1
4,0,0
5,1,0
6,0,1
7,1,0
8,0,0
9,0,0
