In [12]:
import numpy as np
import pandas as pd
import networkx as nx
from random import choice, randint, uniform
from itertools import chain

In [13]:
# 参数定义
num_dags = 1  # DAG数量
# num_nodes_range = (4)  # 每个DAG节点数量范围
sample_sizes = [100, 200, 500]  # 数据集规模
noise_levels = [0.1, 0.5, 1.0]  # 噪声强度

# 存储结果
dag_data = []

# 生成随机DAG，满足马尔可夫条件和忠实性
for i in range(num_dags):
    # 生成随机节点数量的DAG
    num_nodes = 4
    G = nx.DiGraph()
    G.add_nodes_from(range(num_nodes))
    # 0, 0, a, b
    # 0, 0, 0, 0
    # 0, d, 0, c
    # 0, e, 0, 0
    # 创建邻接矩阵，并随机生成有向边
    adj_matrix = np.zeros((num_nodes, num_nodes))
    adj_matrix[0, 2] = uniform(0.1, 1.0)
    adj_matrix[0, 3] = uniform(0.1, 1.0)
    adj_matrix[2, 1] = uniform(0.1, 1.0)
    adj_matrix[2, 3] = uniform(0.1, 1.0)
    adj_matrix[3, 1] = uniform(0.1, 1.0)
    for u in range(num_nodes):
        for v in range(num_nodes):
            if adj_matrix[u, v] > 0:
                G.add_edge(u, v, weight=adj_matrix[u, v])

    # 打印邻接矩阵
    print(f"\nDAG {i} Adjacency Matrix with Weights:\n", adj_matrix)

    # 选择干预和结果变量
    intervention_var = 2
    outcome_var = 1


    # 识别后门调整集
    def find_backdoor_adjustment_set(G, X, Y):
        # 找到所有共同祖先节点
        ancestors_of_X = nx.ancestors(G, X)
        ancestors_of_Y = nx.ancestors(G, Y)
        common_ancestors = ancestors_of_X.intersection(ancestors_of_Y)
        
        # 存储所有后门路径的节点集合
        backdoor_paths_nodes = []

        # 遍历每个共同祖先节点
        for ancestor in common_ancestors:
            # 找到从公共祖先到X的路径
            paths_to_X = list(nx.all_simple_paths(G, source=ancestor, target=X))
            # 找到从公共祖先到Y的路径
            paths_to_Y = list(nx.all_simple_paths(G, source=ancestor, target=Y))

            # 将从公共祖先到X的路径和从公共祖先到Y的路径组合成完整的后门路径
            for path_X in paths_to_X:
                for path_Y in paths_to_Y:
                    # 从路径中去掉公共祖先和终点X、Y，防止重复
                    full_path = set(path_X[:-1] + path_Y[1:-1])
                    # 添加路径到后门路径集合
                    if full_path:
                        backdoor_paths_nodes.append(full_path)
        
        # 找到覆盖所有路径的最小集合
        return minimum_cover(backdoor_paths_nodes)

    # 求最小覆盖集
    def minimum_cover(sets):
        # 展开集合中的所有元素
        elements = set(chain(*sets))
        cover = set()
        
        while sets:
            # 找到最常出现的元素
            most_common = max(elements, key=lambda e: sum(1 for s in sets if e in s))
            cover.add(most_common)
            
            # 移除包含该元素的所有集合
            sets = [s for s in sets if most_common not in s]
            elements.discard(most_common)
        
        return list(cover)

    backdoor_set = find_backdoor_adjustment_set(G, intervention_var, outcome_var)

    # 计算从干预变量到结果变量的总体因果效应（ACE）
    def calculate_ace(G, adj_matrix, X, Y):
        paths = list(nx.all_simple_paths(G, source=X, target=Y))
        ace = 0
        for path in paths:
            # 串联：每条路径上的系数相乘
            path_weight = np.prod([adj_matrix[path[i], path[i+1]] for i in range(len(path) - 1)])
            # 并联：每条路径的效应相加
            ace += path_weight
        return ace

    ace = calculate_ace(G, adj_matrix, intervention_var, outcome_var)
    print(f"ACE for DAG {i} from {intervention_var} to {outcome_var}:", ace)

    # 生成不同规模的数据及噪声
    for sample_size in sample_sizes:
        for noise_level in noise_levels:
            data = np.zeros((sample_size, num_nodes))
            for t in range(sample_size):
                # 初始化根节点的值
                for node in nx.topological_sort(G):
                    noise = np.random.normal(0, noise_level)
                    parent_values = sum(adj_matrix[parent, node] * data[t, parent] for parent in G.predecessors(node))
                    data[t, node] = parent_values + noise
            
            # 存储DAG数据及因果效应
            dag_data.append({
                'dag_id': i,
                'sample_size': sample_size,
                'noise_level': noise_level,
                'weight_matrix': adj_matrix,
                'intervention_var': intervention_var,
                'outcome_var': outcome_var,
                'backdoor_set': backdoor_set,
                'ace': ace,
                'data': pd.DataFrame(data, columns=[f'X{j}' for j in range(num_nodes)])
            })


DAG 0 Adjacency Matrix with Weights:
 [[0.         0.         0.52769133 0.28787633]
 [0.         0.         0.         0.        ]
 [0.         0.56317917 0.         0.1927067 ]
 [0.         0.61161314 0.         0.        ]]
ACE for DAG 0 from 2 to 1: 0.6810411163840985


In [14]:

# 示例：打印生成的第一个DAG的结果
example_dag = dag_data[0]
print("DAG ID:", example_dag['dag_id'])
print("Sample Size:", example_dag['sample_size'])
print("Noise Level:", example_dag['noise_level'])
print("Intervention Variable:", example_dag['intervention_var'])
print("Outcome Variable:", example_dag['outcome_var'])
print("Backdoor Adjustment Set:", example_dag['backdoor_set'])
print("ACE (Total Effect):", example_dag['ace'])
print("Sample Data:\n", example_dag['data'].head())


DAG ID: 0
Sample Size: 100
Noise Level: 0.1
Intervention Variable: 2
Outcome Variable: 1
Backdoor Adjustment Set: [0]
ACE (Total Effect): 0.6810411163840985
Sample Data:
          X0        X1        X2        X3
0  0.084078 -0.179204 -0.045616 -0.084854
1  0.137080  0.137361  0.083443  0.054450
2 -0.087752 -0.040261 -0.025439  0.010901
3 -0.008461 -0.106731 -0.063482  0.016096
4  0.044499  0.135395  0.041432  0.099266


此时，ACE的计算已经完成，下面需要确认后门路径的调整集能否准确识别。
都改好了