In [1]:
!python --version

Python 3.5.6


In [1]:
import pandas as pd
data = pd.DataFrame(data={'fruit': ["banana", "apple", "banana", "apple", "banana","apple", "banana", 
                                    "apple", "apple", "apple", "banana", "banana", "apple", "banana",], 
                          'tasty': ["yes", "no", "yes", "yes", "yes", "yes", "yes", 
                                    "yes", "yes", "yes", "yes", "no", "no", "no"], 
                          'size': ["large", "large", "large", "small", "large", "large", "large",
                                    "small", "large", "large", "large", "large", "small", "small"]})
print(data)

     fruit   size tasty
0   banana  large   yes
1    apple  large    no
2   banana  large   yes
3    apple  small   yes
4   banana  large   yes
5    apple  large   yes
6   banana  large   yes
7    apple  small   yes
8    apple  large   yes
9    apple  large   yes
10  banana  large   yes
11  banana  large    no
12   apple  small    no
13  banana  small    no


In [2]:
from pgmpy.models import BayesianModel

model = BayesianModel([('fruit', 'tasty'), ('size', 'tasty')])  # fruit -> tasty <- size

In [3]:
from pgmpy.estimators import ParameterEstimator
pe = ParameterEstimator(model, data)
print("\n", pe.state_counts('fruit'))  # unconditional
print("\n", pe.state_counts('tasty'))  # conditional on fruit and size


         fruit
apple       7
banana      7

 fruit apple       banana      
size  large small  large small
tasty                         
no      1.0   1.0    1.0   1.0
yes     3.0   2.0    5.0   0.0


In [4]:
from pgmpy.estimators import MaximumLikelihoodEstimator
mle = MaximumLikelihoodEstimator(model, data)
print(mle.estimate_cpd('fruit'))  # unconditional
print(mle.estimate_cpd('tasty'))  # conditional

╒═══════════════╤═════╕
│ fruit(apple)  │ 0.5 │
├───────────────┼─────┤
│ fruit(banana) │ 0.5 │
╘═══════════════╧═════╛
╒════════════╤══════════════╤════════════════════╤═════════════════════╤═══════════════╕
│ fruit      │ fruit(apple) │ fruit(apple)       │ fruit(banana)       │ fruit(banana) │
├────────────┼──────────────┼────────────────────┼─────────────────────┼───────────────┤
│ size       │ size(large)  │ size(small)        │ size(large)         │ size(small)   │
├────────────┼──────────────┼────────────────────┼─────────────────────┼───────────────┤
│ tasty(no)  │ 0.25         │ 0.3333333333333333 │ 0.16666666666666666 │ 1.0           │
├────────────┼──────────────┼────────────────────┼─────────────────────┼───────────────┤
│ tasty(yes) │ 0.75         │ 0.6666666666666666 │ 0.8333333333333334  │ 0.0           │
╘════════════╧══════════════╧════════════════════╧═════════════════════╧═══════════════╛


In [5]:
from pgmpy.estimators import BayesianEstimator
est = BayesianEstimator(model, data)

print(est.estimate_cpd('tasty', prior_type='BDeu', equivalent_sample_size=10))

╒════════════╤═════════════════════╤════════════════════╤════════════════════╤═════════════════════╕
│ fruit      │ fruit(apple)        │ fruit(apple)       │ fruit(banana)      │ fruit(banana)       │
├────────────┼─────────────────────┼────────────────────┼────────────────────┼─────────────────────┤
│ size       │ size(large)         │ size(small)        │ size(large)        │ size(small)         │
├────────────┼─────────────────────┼────────────────────┼────────────────────┼─────────────────────┤
│ tasty(no)  │ 0.34615384615384615 │ 0.4090909090909091 │ 0.2647058823529412 │ 0.6428571428571429  │
├────────────┼─────────────────────┼────────────────────┼────────────────────┼─────────────────────┤
│ tasty(yes) │ 0.6538461538461539  │ 0.5909090909090909 │ 0.7352941176470589 │ 0.35714285714285715 │
╘════════════╧═════════════════════╧════════════════════╧════════════════════╧═════════════════════╛


In [6]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator

# generate data
data = pd.DataFrame(np.random.randint(low=0, high=2, size=(5000, 4)), columns=['A', 'B', 'C', 'D'])
model = BayesianModel([('A', 'B'), ('A', 'C'), ('D', 'C'), ('B', 'D')])

model.fit(data, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5
for cpd in model.get_cpds():
    print(cpd)

╒══════╤══════════╕
│ A(0) │ 0.493207 │
├──────┼──────────┤
│ A(1) │ 0.506793 │
╘══════╧══════════╛
╒══════╤═════════════════════╤════════════════════╤═══════════════════╤═════════════════════╕
│ A    │ A(0)                │ A(0)               │ A(1)              │ A(1)                │
├──────┼─────────────────────┼────────────────────┼───────────────────┼─────────────────────┤
│ D    │ D(0)                │ D(1)               │ D(0)              │ D(1)                │
├──────┼─────────────────────┼────────────────────┼───────────────────┼─────────────────────┤
│ C(0) │ 0.5337824250955158  │ 0.5097939196082432 │ 0.498841027622175 │ 0.49154759508955526 │
├──────┼─────────────────────┼────────────────────┼───────────────────┼─────────────────────┤
│ C(1) │ 0.46621757490448423 │ 0.4902060803917568 │ 0.501158972377825 │ 0.5084524049104447  │
╘══════╧═════════════════════╧════════════════════╧═══════════════════╧═════════════════════╛
╒══════╤═════════════════════╤═════════════════════╕
│

In [9]:
import pandas as pd
import numpy as np
from pgmpy.estimators import BdeuScore, K2Score, BicScore
from pgmpy.models import BayesianModel

# create random data sample with 3 variables, where Z is dependent on X, Y:
data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 2)), columns=list('XY'))
data['Z'] = data['X'] + data['Y']

bdeu = BdeuScore(data, equivalent_sample_size=5)
k2 = K2Score(data)
bic = BicScore(data)

model1 = BayesianModel([('X', 'Z'), ('Y', 'Z')])  # X -> Z <- Y
model2 = BayesianModel([('X', 'Z'), ('X', 'Y')])  # Y <- X -> Z


print(bdeu.score(model1))
print(k2.score(model1))
print(bic.score(model1))

print(bdeu.score(model2))
print(k2.score(model2))
print(bic.score(model2))

-13937.788006429186
-14328.52525469756
-14293.8300674
-20895.741459979847
-20922.58850494716
-20939.7925435


In [10]:
print(bdeu.local_score('Z', parents=[]))
print(bdeu.local_score('Z', parents=['X']))
print(bdeu.local_score('Z', parents=['X', 'Y']))

-9160.910787211043
-6988.8789819904105
-57.116761761048565


In [11]:
from pgmpy.estimators import ExhaustiveSearch

es = ExhaustiveSearch(data, scoring_method=bic)
best_model = es.estimate()
print(best_model.edges())

print("\nAll DAGs by score:")
for score, dag in reversed(es.all_scores()):
    print(score, dag.edges())

[('X', 'Z'), ('Y', 'Z')]

All DAGs by score:
-14293.8300674 [('X', 'Z'), ('Y', 'Z')]
-14324.1239973 [('Z', 'Y'), ('X', 'Z'), ('X', 'Y')]
-14324.1239973 [('Z', 'X'), ('Z', 'Y'), ('Y', 'X')]
-14324.1239973 [('Z', 'X'), ('Y', 'Z'), ('Y', 'X')]
-14324.1239973 [('X', 'Z'), ('Y', 'Z'), ('Y', 'X')]
-14324.1239973 [('Z', 'X'), ('Z', 'Y'), ('X', 'Y')]
-14324.1239973 [('X', 'Z'), ('X', 'Y'), ('Y', 'Z')]
-16464.4037942 [('Z', 'Y'), ('X', 'Y')]
-16465.5924607 [('Z', 'X'), ('Y', 'X')]
-18768.0301502 [('Z', 'X'), ('Y', 'Z')]
-18768.0301502 [('Z', 'Y'), ('X', 'Z')]
-18768.0301502 [('Z', 'X'), ('Z', 'Y')]
-20908.3099472 [('Z', 'Y')]
-20908.3099472 [('Y', 'Z')]
-20909.4986136 [('Z', 'X')]
-20909.4986136 [('X', 'Z')]
-20938.603877 [('X', 'Y'), ('Y', 'Z')]
-20938.603877 [('Z', 'Y'), ('Y', 'X')]
-20938.603877 [('Y', 'Z'), ('Y', 'X')]
-20939.7925435 [('X', 'Z'), ('Y', 'X')]
-20939.7925435 [('Z', 'X'), ('X', 'Y')]
-20939.7925435 [('X', 'Z'), ('X', 'Y')]
-23049.7784106 []
-23080.0723404 [('X', 'Y')]
-23080.0

In [12]:
from pgmpy.estimators import HillClimbSearch

# create some data with dependencies
data = pd.DataFrame(np.random.randint(0, 3, size=(2500, 8)), columns=list('ABCDEFGH'))
data['A'] += data['B'] + data['C']
data['H'] = data['G'] - data['A']

hc = HillClimbSearch(data, scoring_method=BicScore(data))
best_model = hc.estimate()
print(best_model.edges())

[('A', 'C'), ('A', 'B'), ('C', 'B'), ('H', 'A'), ('G', 'A'), ('G', 'H')]


In [14]:
from pgmpy.estimators import ConstraintBasedEstimator

data = pd.DataFrame(np.random.randint(0, 3, size=(2500, 8)), columns=list('ABCDEFGH'))
data['A'] += data['B'] + data['C']
data['H'] = data['G'] - data['A']
data['E'] *= data['F']

est = ConstraintBasedEstimator(data)

print(est.test_conditional_independence('B', 'H'))          # dependent
print(est.test_conditional_independence('B', 'E'))          # independent
print(est.test_conditional_independence('B', 'H', ['A']))   # independent
print(est.test_conditional_independence('A', 'G'))          # independent
print(est.test_conditional_independence('A', 'G',  ['H']))  # dependent

(713.829722388253, 9.1010828464348721e-134, True)
(10.119393215763498, 0.51968074367018713, True)
(12.825047143811497, 0.99999885613426298, True)
(7.6962975022135991, 0.99369392437360171, True)
(4646.0, 0.0, True)


In [15]:
def is_independent(X, Y, Zs=[], significance_level=0.05):
    return est.test_conditional_independence(X, Y, Zs)[1] >= significance_level

print(is_independent('B', 'H'))
print(is_independent('B', 'E'))
print(is_independent('B', 'H', ['A']))
print(is_independent('A', 'G'))
print(is_independent('A', 'G', ['H']))

False
True
True
True
False


In [16]:
skel, seperating_sets = est.estimate_skeleton(significance_level=0.01)
print("Undirected edges: ", skel.edges())

pdag = est.skeleton_to_pdag(skel, seperating_sets)
print("PDAG edges:       ", pdag.edges())

model = est.pdag_to_dag(pdag)
print("DAG edges:        ", model.edges())

Undirected edges:  [('B', 'A'), ('F', 'E'), ('A', 'C'), ('A', 'H'), ('H', 'G')]
PDAG edges:        [('B', 'A'), ('F', 'E'), ('A', 'H'), ('C', 'A'), ('G', 'H'), ('E', 'F')]
DAG edges:         [('B', 'A'), ('A', 'H'), ('C', 'A'), ('G', 'H'), ('E', 'F')]


In [17]:
print(est.estimate(significance_level=0.01).edges())


[('B', 'A'), ('A', 'H'), ('C', 'A'), ('G', 'H'), ('E', 'F')]


In [18]:
from pgmpy.independencies import Independencies

ind = Independencies(['B', 'C'],
                     ['A', ['B', 'C'], 'D'])
ind = ind.closure()  # required (!) for faithfulness

model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind)

print(model.edges())

[('A', 'D'), ('C', 'D'), ('B', 'D')]
