In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from random import choice, randint, uniform

In [3]:
# 参数定义
num_dags = 100  # DAG数量
num_nodes_range = (5, 15)  # 每个DAG节点数量范围
sample_sizes = [100, 200, 500]  # 数据集规模
noise_levels = [0.1, 0.5, 1.0]  # 噪声强度

# 存储结果
dag_data = []

# 生成随机DAG，满足马尔可夫条件和忠实性
for i in range(num_dags):
    # 生成随机节点数量的DAG
    num_nodes = randint(*num_nodes_range)
    G = nx.DiGraph()
    G.add_nodes_from(range(num_nodes))
    
    # 创建邻接矩阵，并随机生成有向边
    adj_matrix = np.zeros((num_nodes, num_nodes))
    for u in range(num_nodes):
        for v in range(u + 1, num_nodes):
            if np.random.rand() > 0.7:  # 30%几率添加边
                weight = uniform(0.1, 1.0)
                adj_matrix[u, v] = weight
                G.add_edge(u, v, weight=weight)

    # 打印邻接矩阵
    print(f"\nDAG {i} Adjacency Matrix with Weights:\n", adj_matrix)

    # 选择干预和结果变量
    intervention_var = choice(list(G.nodes))
    outcome_var = choice([n for n in G.nodes if n != intervention_var])

    # 识别后门调整集
    def find_backdoor_adjustment_set(G, X, Y):
        backdoor_set = set()
        for node in G.nodes():
            if node != X and node != Y and not nx.has_path(G, X, node):  # 排除X的后代
                if any(nx.has_path(G, Z, Y) for Z in G.predecessors(node)):  # 是否能阻断路径
                    backdoor_set.add(node)
        return list(backdoor_set)

    backdoor_set = find_backdoor_adjustment_set(G, intervention_var, outcome_var)
    
    # 计算从干预变量到结果变量的总体因果效应
    def calculate_ace(adj_matrix, X, Y):
        paths = list(nx.all_simple_paths(G, source=X, target=Y))
        ace = 0
        for path in paths:
            path_weight = np.prod([adj_matrix[path[i], path[i+1]] for i in range(len(path) - 1)])
            ace += path_weight
        return ace

    ace = calculate_ace(adj_matrix, intervention_var, outcome_var)
    print(f"ACE for DAG {i} from {intervention_var} to {outcome_var}:", ace)

    # 生成不同规模的数据及噪声
    for sample_size in sample_sizes:
        for noise_level in noise_levels:
            data = np.zeros((sample_size, num_nodes))
            for t in range(sample_size):
                # 初始化根节点的值
                for node in nx.topological_sort(G):
                    noise = np.random.normal(0, noise_level)
                    parent_values = sum(adj_matrix[parent, node] * data[t, parent] for parent in G.predecessors(node))
                    data[t, node] = parent_values + noise
            
            # 存储DAG数据及因果效应
            dag_data.append({
                'dag_id': i,
                'sample_size': sample_size,
                'noise_level': noise_level,
                'weights': adj_matrix,
                'intervention_var': intervention_var,
                'outcome_var': outcome_var,
                'backdoor_set': backdoor_set,
                'ace': ace,
                'data': pd.DataFrame(data, columns=[f'X{j}' for j in range(num_nodes)])
            })


DAG 0 Adjacency Matrix with Weights:
 [[0.         0.         0.         0.74192475 0.         0.18434556
  0.        ]
 [0.         0.         0.         0.         0.         0.18095475
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.50899978]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]]
ACE for DAG 0 from 2 to 4: 0

DAG 1 Adjacency Matrix with Weights:
 [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.15193619 0.94366313]
 [0.         0.         0.68538574 0.         0.         0.
  0.         0.         0.9253799  0.85725554 0.86604473]
 [0.         0.         0.         0.         0.         0.33682211
  0.         0.         0.33538141 0.         0.      

In [17]:

# 示例：打印生成的第一个DAG的结果
example_dag = dag_data[873]
print("DAG ID:", example_dag['dag_id'])
print("Sample Size:", example_dag['sample_size'])
print("Noise Level:", example_dag['noise_level'])
print("Intervention Variable:", example_dag['intervention_var'])
print("Outcome Variable:", example_dag['outcome_var'])
print("Backdoor Adjustment Set:", example_dag['backdoor_set'])
print("ACE (Total Effect):", example_dag['ace'])
print("Sample Data:\n", example_dag['data'].head())


DAG ID: 97
Sample Size: 100
Noise Level: 0.1
Intervention Variable: 0
Outcome Variable: 3
Backdoor Adjustment Set: []
ACE (Total Effect): 0.10157747085349404
Sample Data:
          X0        X1        X2        X3        X4        X5        X6  \
0  0.100606 -0.226680 -0.085711 -0.018140 -0.156670 -0.001986 -0.088050   
1 -0.121657 -0.154485  0.109538 -0.104870 -0.424729 -0.176918 -0.036102   
2 -0.043216 -0.043474 -0.146690 -0.095768 -0.099978 -0.183639 -0.026585   
3 -0.037563 -0.041941 -0.026121  0.027301 -0.103073  0.120706  0.146330   
4  0.056036 -0.005226 -0.169523  0.190767  0.075606  0.224816  0.092660   

         X7        X8        X9       X10       X11       X12       X13  
0  0.175228 -0.095685  0.056693 -0.047030 -0.037561 -0.056818 -0.092703  
1 -0.113459 -0.057432 -0.282965 -0.119188 -0.062519 -0.371846 -0.494028  
2 -0.077141 -0.012369 -0.208051 -0.155589 -0.202201  0.074015 -0.642188  
3  0.150875  0.141800  0.098421 -0.097121 -0.055523  0.148959 -0.072826  
4  0.25