In [12]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
from pgmpy.sampling import BayesianModelSampling
from pgmpy.inference import CausalInference
from utils import adj_matrix_to_edges, remove_self_child
from learn_bayesian_network import Bayesian_Model_Bandit


In [13]:
data = pd.read_csv('data/5_nodes.csv')
causal_structure =np.loadtxt('data/5_node_matrix.txt')

print(causal_structure)
data

[[0. 1. 0. 0. 0.]
 [0. 0. 1. 1. 0.]
 [0. 0. 0. 1. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


Unnamed: 0,x_1,x_2,x_3,x_4,Y
0,1,1,1,0,0
1,0,0,0,0,0
2,1,1,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0
...,...,...,...,...,...
495,0,0,0,0,0
496,1,0,0,0,0
497,1,1,1,0,0
498,0,0,0,0,0


In [14]:
edges = adj_matrix_to_edges(causal_structure)

model = BayesianNetwork(edges)
estimator = BayesianEstimator(model, data)


In [15]:
filtered_data = data[(data['x_2'] == 1) ]
filtered_data

Unnamed: 0,x_1,x_2,x_3,x_4,Y
0,1,1,1,0,0
2,1,1,0,0,0
8,1,1,1,1,1
13,1,1,1,1,0
19,1,1,1,0,1
...,...,...,...,...,...
478,1,1,1,0,1
485,1,1,0,0,0
487,1,1,1,0,0
489,1,1,1,0,0


In [16]:
print(estimator.estimate_cpd('Y'))

+------+-----------------------+--------------------+
| x_3  | x_3(0)                | x_3(1)             |
+------+-----------------------+--------------------+
| Y(0) | 0.9972914409534128    | 0.6264367816091954 |
+------+-----------------------+--------------------+
| Y(1) | 0.0027085590465872156 | 0.3735632183908046 |
+------+-----------------------+--------------------+


In [17]:
estimator.get_parameters()

[<TabularCPD representing P(x_1:2) at 0x7fe295722400>,
 <TabularCPD representing P(x_2:2 | x_1:2) at 0x7fe2962d88e0>,
 <TabularCPD representing P(x_3:2 | x_2:2) at 0x7fe29630cb20>,
 <TabularCPD representing P(x_4:2 | x_2:2, x_3:2) at 0x7fe2962d8df0>,
 <TabularCPD representing P(Y:2 | x_3:2) at 0x7fe2962be3d0>]

In [18]:
model.fit(data,  estimator=BayesianEstimator, prior_type="BDeu")
simulated_data = model.simulate()

  0%|          | 0/5 [00:00<?, ?it/s]

In [19]:
simulated_data.drop('Y', axis=1,inplace=True)

In [20]:
simulated_data

Unnamed: 0,x_4,x_3,x_1,x_2
0,0,0,0,0
1,0,0,0,0
2,0,0,0,0
3,0,0,1,0
4,0,0,0,0
5,0,1,1,1
6,0,0,0,0
7,0,0,1,0
8,0,0,1,0
9,0,0,0,0


In [36]:
sample = simulated_data.iloc[2].to_dict()
modified_sample = remove_self_child(model, 'x_2', sample)

In [37]:
modified_sample

{'x_1': 0}

In [39]:
res = model.simulate(n_samples= 10000, do={'x_2' : 1}, evidence=sample)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [40]:
res.describe()

Unnamed: 0,Y,x_4,x_3,x_1,x_2
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.2075,0.3249,0.5541,0.0,1.0
std,0.405537,0.468361,0.497089,0.0,0.0
min,0.0,0.0,0.0,0.0,1.0
25%,0.0,0.0,0.0,0.0,1.0
50%,0.0,0.0,1.0,0.0,1.0
75%,0.0,1.0,1.0,0.0,1.0
max,1.0,1.0,1.0,0.0,1.0


In [24]:
md = model.get_children('Y')

In [25]:
md

[]

In [26]:
res.describe()

Unnamed: 0,Y,x_4,x_3,x_1,x_2
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.3692,0.4968,1.0,0.0,0.0039
std,0.482612,0.500015,0.0,0.0,0.062331
min,0.0,0.0,1.0,0.0,0.0
25%,0.0,0.0,1.0,0.0,0.0
50%,0.0,0.0,1.0,0.0,0.0
75%,1.0,1.0,1.0,0.0,0.0
max,1.0,1.0,1.0,0.0,1.0


In [27]:
model.get_children('x_2')

['x_3', 'x_4']

In [28]:
print(model.get_cpds('Y'))

+------+-----------------------+--------------------+
| x_3  | x_3(0)                | x_3(1)             |
+------+-----------------------+--------------------+
| Y(0) | 0.9972914409534128    | 0.6264367816091954 |
+------+-----------------------+--------------------+
| Y(1) | 0.0027085590465872156 | 0.3735632183908046 |
+------+-----------------------+--------------------+


In [29]:
simulated_data_partial = simulated_data.drop("x_3",axis=1)

# inference = BayesianModelSampling(model)
# inference.forward_sample(size=10, partial_samples=simulated_data)

In [30]:
model.simulate(n_samples= 100, do={'x_2' : 1},evidence={"x_1" : 1})

  0%|          | 0/100 [00:00<?, ?it/s]

Unnamed: 0,Y,x_4,x_3,x_1,x_2
0,0,0,0,1,1
1,0,0,1,1,1
2,1,1,1,1,1
3,0,1,0,1,1
4,1,1,1,1,1
...,...,...,...,...,...
95,0,0,1,1,1
96,0,0,0,1,1
97,0,0,1,1,1
98,0,0,1,1,1


In [31]:
print(model.get_cpds('x_3'))

+--------+-----------------------+--------------------+
| x_2    | x_2(0)                | x_2(1)             |
+--------+-----------------------+--------------------+
| x_3(0) | 0.9970828471411902    | 0.4477124183006536 |
+--------+-----------------------+--------------------+
| x_3(1) | 0.0029171528588098016 | 0.5522875816993464 |
+--------+-----------------------+--------------------+


In [32]:
simulated_data_partial

Unnamed: 0,x_4,x_1,x_2
0,0,0,0
1,0,0,0
2,0,0,0
3,0,1,0
4,0,0,0
5,0,1,1
6,0,0,0
7,0,1,0
8,0,1,0
9,0,0,0


In [33]:
model.do(nodes='')

ValueError: Nodes not found in the model: {''}

In [None]:
simulated_data_partial

In [None]:

inference = CausalInference(model)
a = inference.get_all_backdoor_adjustment_sets("x_1", "Y")

In [None]:
dist = inference.query(["Y"], do=  {"x_3" : 1}, evidence= {"x_1" : 1, "Y" : 0, "x_2" : 1})

In [None]:
dist.values

In [None]:
dist.values