In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from random import choice, randint, uniform

In [2]:
# 参数定义
num_dags = 100  # DAG数量
num_nodes_range = (5, 15)  # 每个DAG节点数量范围
sample_sizes = [100, 200, 500]  # 数据集规模
noise_levels = [0.1, 0.5, 1.0]  # 噪声强度

# 存储结果
dag_data = []

# 生成随机DAG，满足马尔可夫条件和忠实性
for i in range(num_dags):
    # 生成随机节点数量的DAG
    num_nodes = randint(*num_nodes_range)
    G = nx.DiGraph()
    G.add_nodes_from(range(num_nodes))
    
    # 创建邻接矩阵，并随机生成有向边
    adj_matrix = np.zeros((num_nodes, num_nodes))
    for u in range(num_nodes):
        for v in range(u + 1, num_nodes):
            if np.random.rand() > 0.7:  # 30%几率添加边
                weight = uniform(0.1, 1.0)
                adj_matrix[u, v] = weight
                G.add_edge(u, v, weight=weight)

    # 打印邻接矩阵
    print(f"\nDAG {i} Adjacency Matrix with Weights:\n", adj_matrix)

    # 选择干预和结果变量
    intervention_var = choice(list(G.nodes))
    outcome_var = choice([n for n in G.nodes if n != intervention_var])

    # 识别后门调整集
    def find_backdoor_adjustment_set(G, X, Y):
        backdoor_set = set()
        for node in G.nodes():
            if node != X and node != Y and not nx.has_path(G, X, node):  # 排除X的后代
                if any(nx.has_path(G, Z, Y) for Z in G.predecessors(node)):  # 是否能阻断路径
                    backdoor_set.add(node)
        return list(backdoor_set)

    backdoor_set = find_backdoor_adjustment_set(G, intervention_var, outcome_var)

    # 计算从干预变量到结果变量的总体因果效应（ACE）
    def calculate_ace(G, adj_matrix, X, Y):
        paths = list(nx.all_simple_paths(G, source=X, target=Y))
        ace = 0
        for path in paths:
            # 串联：每条路径上的系数相乘
            path_weight = np.prod([adj_matrix[path[i], path[i+1]] for i in range(len(path) - 1)])
            # 并联：每条路径的效应相加
            ace += path_weight
        return ace

    ace = calculate_ace(G, adj_matrix, intervention_var, outcome_var)
    print(f"ACE for DAG {i} from {intervention_var} to {outcome_var}:", ace)

    # 生成不同规模的数据及噪声
    for sample_size in sample_sizes:
        for noise_level in noise_levels:
            data = np.zeros((sample_size, num_nodes))
            for t in range(sample_size):
                # 初始化根节点的值
                for node in nx.topological_sort(G):
                    noise = np.random.normal(0, noise_level)
                    parent_values = sum(adj_matrix[parent, node] * data[t, parent] for parent in G.predecessors(node))
                    data[t, node] = parent_values + noise
            
            # 存储DAG数据及因果效应
            dag_data.append({
                'dag_id': i,
                'sample_size': sample_size,
                'noise_level': noise_level,
                'weight_matrix': adj_matrix,
                'intervention_var': intervention_var,
                'outcome_var': outcome_var,
                'backdoor_set': backdoor_set,
                'ace': ace,
                'data': pd.DataFrame(data, columns=[f'X{j}' for j in range(num_nodes)])
            })


DAG 0 Adjacency Matrix with Weights:
 [[0.         0.         0.         0.10909059 0.         0.        ]
 [0.         0.         0.         0.53585579 0.         0.        ]
 [0.         0.         0.         0.         0.87983708 0.        ]
 [0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.70655426]
 [0.         0.         0.         0.         0.         0.        ]]
ACE for DAG 0 from 4 to 3: 0

DAG 1 Adjacency Matrix with Weights:
 [[0.         0.         0.         0.         0.29313307 0.
  0.37235199 0.         0.         0.         0.         0.99915702
  0.2231921  0.17866667]
 [0.         0.         0.         0.         0.         0.27925164
  0.         0.         0.         0.55422594 0.         0.40147777
  0.55558047 0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.268989   0.         0.63641384 0.         0.
  0.         0.        ]
 [0.         0.    

In [4]:

# 示例：打印生成的第一个DAG的结果
example_dag = dag_data[0]
print("DAG ID:", example_dag['dag_id'])
print("Sample Size:", example_dag['sample_size'])
print("Noise Level:", example_dag['noise_level'])
print("Intervention Variable:", example_dag['intervention_var'])
print("Outcome Variable:", example_dag['outcome_var'])
print("Backdoor Adjustment Set:", example_dag['backdoor_set'])
print("ACE (Total Effect):", example_dag['ace'])
print("Sample Data:\n", example_dag['data'].head())


DAG ID: 0
Sample Size: 100
Noise Level: 0.1
Intervention Variable: 4
Outcome Variable: 3
Backdoor Adjustment Set: []
ACE (Total Effect): 0
Sample Data:
          X0        X1        X2        X3        X4        X5
0 -0.100054 -0.044989 -0.017614  0.084934  0.032705  0.032623
1 -0.032168  0.017023 -0.026431 -0.131576  0.037789 -0.011116
2  0.068356  0.028411 -0.029544  0.090132  0.118172 -0.088562
3  0.009085 -0.078175  0.005107 -0.195060 -0.098088 -0.031489
4 -0.156150  0.060080  0.058027  0.080600  0.088626  0.192559
