In [1]:
# 引入库

from pgmpy.models import BayesianModel
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.inference import BeliefPropagation
import pandas as pd
import numpy as np
from typing import *

In [2]:
# 创建数据 # 6个样本，4个特征： 6*4矩阵

data = {
    'Packet11': [0, 1, 2, 4,True,False],
    'Packet12': [0, 1, 5, 0,True,True],
    'Packet13': [0, 1, 2, 4,5,False],
    'Packet14': [0, 1, True, False,5,5]
}

X = np.random.rand(10)
Y = np.random.rand(10)

Z = X + Y
data2 = {
    'X': X,
    'Y': Y,
    'Z': Z,
}
data = pd.DataFrame(data)
print(data)

  Packet11 Packet12 Packet13 Packet14
0        0        0        0        0
1        1        1        1        1
2        2        5        2     True
3        4        0        4    False
4     True     True        5        5
5    False     True    False        5


In [3]:
# 学习
hc = HillClimbSearch(data)
best_model = hc.estimate(scoring_method=BicScore(data))
print(best_model.edges())

  0%|          | 0/1000000 [00:00<?, ?it/s]

[('Packet12', 'Packet14')]


In [4]:
# 通过检查d-分隔计算变量之间的独立性
best_model.get_independencies()

(Packet12 ⟂ Packet13, Packet11)
(Packet12 ⟂ Packet11 | Packet13)
(Packet12 ⟂ Packet13, Packet11 | Packet14)
(Packet12 ⟂ Packet13 | Packet11)
(Packet12 ⟂ Packet11 | Packet13, Packet14)
(Packet12 ⟂ Packet13 | Packet11, Packet14)
(Packet13 ⟂ Packet12, Packet14, Packet11)
(Packet13 ⟂ Packet14, Packet11 | Packet12)
(Packet13 ⟂ Packet12, Packet11 | Packet14)
(Packet13 ⟂ Packet12, Packet14 | Packet11)
(Packet13 ⟂ Packet11 | Packet12, Packet14)
(Packet13 ⟂ Packet14 | Packet12, Packet11)
(Packet13 ⟂ Packet12 | Packet11, Packet14)
(Packet14 ⟂ Packet13, Packet11)
(Packet14 ⟂ Packet13, Packet11 | Packet12)
(Packet14 ⟂ Packet11 | Packet13)
(Packet14 ⟂ Packet13 | Packet11)
(Packet14 ⟂ Packet11 | Packet12, Packet13)
(Packet14 ⟂ Packet13 | Packet12, Packet11)
(Packet11 ⟂ Packet12, Packet13, Packet14)
(Packet11 ⟂ Packet13, Packet14 | Packet12)
(Packet11 ⟂ Packet12, Packet14 | Packet13)
(Packet11 ⟂ Packet12, Packet13 | Packet14)
(Packet11 ⟂ Packet14 | Packet12, Packet13)
(Packet11 ⟂ Packet13 | Packet12,

In [5]:

# 建立Graph # 
# 考虑到数据包的前后因果性和空间上的相关性
def construct_base_edges(n, m) -> List[tuple]:
    # n*m ， n flows， each flow has m packets
    edges = []
    for i in range(1,n+1):
        for j in range(1,m+1):
            if i == 1 and j == 1:
                continue
            elif i == 1 and j!= 1:
                edges.append((f"Packet{i}{j-1}", f"Packet{i}{j}"))
            elif j == 1 and i!=1:
                edges.append((f"Packet{i-1}{j}", f"Packet{i}{j}"))
            else:
                edges.append((f"Packet{i-1}{j}", f"Packet{i}{j}"))
                edges.append((f"Packet{i}{j-1}", f"Packet{i}{j}"))
        
    
    return edges

[('Packet11', 'Packet12'), ('Packet12', 'Packet13'), ('Packet11', 'Packet21'), ('Packet12', 'Packet22'), ('Packet21', 'Packet22'), ('Packet13', 'Packet23'), ('Packet22', 'Packet23'), ('Packet21', 'Packet31'), ('Packet22', 'Packet32'), ('Packet31', 'Packet32'), ('Packet23', 'Packet33'), ('Packet32', 'Packet33'), ('Packet31', 'Packet41'), ('Packet32', 'Packet42'), ('Packet41', 'Packet42'), ('Packet33', 'Packet43'), ('Packet42', 'Packet43')]


In [6]:
###### MLE参数学习 #########
# 使用 MaximumLikelihoodEstimator 进行参数学习
from pgmpy.estimators import HillClimbSearch, K2Score, MaximumLikelihoodEstimator

base_edges = construct_base_edges(1,4)
print(base_edges)
model = BayesianModel(base_edges) # input: 边集合  output: model
model.fit(data, estimator=MaximumLikelihoodEstimator) # 最大似然学习参数


[('Packet11', 'Packet12'), ('Packet12', 'Packet13'), ('Packet13', 'Packet14')]




In [10]:
###### 信念传播推断 #########

# 使用 BeliefPropagation 进行推断
inference_bp = BeliefPropagation(model)
query_bp = inference_bp.map_query(variables=['Packet14'], evidence={'Packet13': 1,'Packet12': 5})
print("信念传播算法的推断结果：", query_bp)
# 现在有了初始的因果关系



信念传播算法的推断结果： {'Packet14': 1}
