In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from random import choice, randint, uniform
from scipy.stats import norm

In [2]:

# 参数定义
num_dags = 100  # DAG数量
num_nodes_range = (5, 15)  # 每个DAG节点数量范围
sample_sizes = [100, 200, 500]  # 数据集规模
noise_levels = [0.1, 0.5, 1.0]  # 噪声强度

# 存储结果
dag_data = []

# 生成随机DAG，满足马尔可夫条件和忠实性
for i in range(num_dags):
    # 生成随机节点数量的DAG
    num_nodes = randint(*num_nodes_range)
    G = nx.DiGraph()
    G.add_nodes_from(range(num_nodes))
    
    # 随机生成有向边，确保DAG无环
    while not nx.is_directed_acyclic_graph(G):
        G.clear_edges()
        for n in range(num_nodes):
            for m in range(n+1, num_nodes):
                if np.random.rand() > 0.7:  # 30%几率添加边
                    G.add_edge(n, m)
    
    # 赋予每条边随机权重
    for (u, v) in G.edges():
        G[u][v]['weight'] = uniform(0.1, 1.0)

    # 打印DAG的权重信息
    print(f"DAG {i} Edge Weights:", [(u, v, G[u][v]['weight']) for u, v in G.edges()])
    
    # 选择干预和结果变量
    intervention_var = choice(list(G.nodes))
    outcome_var = choice([n for n in G.nodes if n != intervention_var])

    # 识别后门调整集
    def find_backdoor_adjustment_set(G, X, Y):
        backdoor_set = set()
        for node in G.nodes():
            if node != X and node != Y and not nx.has_path(G, X, node):  # 排除X的后代
                if any(nx.has_path(G, Z, Y) for Z in G.predecessors(node)):  # 是否能阻断路径
                    backdoor_set.add(node)
        return list(backdoor_set)

    backdoor_set = find_backdoor_adjustment_set(G, intervention_var, outcome_var)
    
    # 计算从干预变量到结果变量的总体因果效应
    def calculate_ace(G, X, Y):
        paths = list(nx.all_simple_paths(G, source=X, target=Y))
        ace = 0
        for path in paths:
            path_weight = np.prod([G[path[i]][path[i+1]]['weight'] for i in range(len(path) - 1)])
            ace += path_weight
        return ace

    ace = calculate_ace(G, intervention_var, outcome_var)

    # 生成不同规模的数据及噪声
    for sample_size in sample_sizes:
        for noise_level in noise_levels:
            data = np.zeros((sample_size, num_nodes))
            for t in range(sample_size):
                # 初始化根节点的值
                for node in nx.topological_sort(G):
                    noise = np.random.normal(0, noise_level)
                    parent_values = sum(G[parent][node]['weight'] * data[t, parent] for parent in G.predecessors(node))
                    data[t, node] = parent_values + noise
            
            # 存储DAG数据及因果效应
            dag_data.append({
                'dag_id': i,
                'sample_size': sample_size,
                'noise_level': noise_level,
                'intervention_var': intervention_var,
                'outcome_var': outcome_var,
                'backdoor_set': backdoor_set,
                'ace': ace,
                'data': pd.DataFrame(data, columns=[f'X{j}' for j in range(num_nodes)])
            })

DAG 0 Edge Weights: []
DAG 1 Edge Weights: []
DAG 2 Edge Weights: []
DAG 3 Edge Weights: []
DAG 4 Edge Weights: []
DAG 5 Edge Weights: []
DAG 6 Edge Weights: []
DAG 7 Edge Weights: []
DAG 8 Edge Weights: []
DAG 9 Edge Weights: []
DAG 10 Edge Weights: []
DAG 11 Edge Weights: []
DAG 12 Edge Weights: []
DAG 13 Edge Weights: []
DAG 14 Edge Weights: []
DAG 15 Edge Weights: []
DAG 16 Edge Weights: []
DAG 17 Edge Weights: []
DAG 18 Edge Weights: []
DAG 19 Edge Weights: []
DAG 20 Edge Weights: []
DAG 21 Edge Weights: []
DAG 22 Edge Weights: []
DAG 23 Edge Weights: []
DAG 24 Edge Weights: []
DAG 25 Edge Weights: []
DAG 26 Edge Weights: []
DAG 27 Edge Weights: []
DAG 28 Edge Weights: []
DAG 29 Edge Weights: []
DAG 30 Edge Weights: []
DAG 31 Edge Weights: []
DAG 32 Edge Weights: []
DAG 33 Edge Weights: []
DAG 34 Edge Weights: []
DAG 35 Edge Weights: []
DAG 36 Edge Weights: []
DAG 37 Edge Weights: []
DAG 38 Edge Weights: []
DAG 39 Edge Weights: []
DAG 40 Edge Weights: []
DAG 41 Edge Weights: []
DA

In [4]:
# 示例：打印生成的第一个DAG的结果
example_dag = dag_data[3]
print("DAG ID:", example_dag['dag_id'])
print("Sample Size:", example_dag['sample_size'])
print("Noise Level:", example_dag['noise_level'])
print("Intervention Variable:", example_dag['intervention_var'])
print("Outcome Variable:", example_dag['outcome_var'])
print("Backdoor Adjustment Set:", example_dag['backdoor_set'])
print("ACE (Total Effect):", example_dag['ace'])
print("Sample Data:\n", example_dag['data'].head())


DAG ID: 0
Sample Size: 200
Noise Level: 0.1
Intervention Variable: 7
Outcome Variable: 5
Backdoor Adjustment Set: []
ACE (Total Effect): 0
Sample Data:
          X0        X1        X2        X3        X4        X5        X6  \
0 -0.113303  0.154077  0.014913 -0.125249 -0.008977  0.142130 -0.029938   
1 -0.036755  0.050531  0.106317 -0.147961 -0.105722 -0.272185  0.212597   
2  0.019136  0.110601 -0.079766  0.047552 -0.157385  0.053101 -0.042881   
3  0.089958 -0.142836  0.098507  0.056075  0.053552  0.075211  0.143027   
4  0.130260 -0.047563  0.140496  0.045777  0.052920  0.069476 -0.072910   

         X7        X8        X9       X10       X11       X12       X13  \
0 -0.104033 -0.032791 -0.042416  0.019029 -0.045673  0.138184 -0.031090   
1  0.003255 -0.074363 -0.036454 -0.348379 -0.069265  0.079532  0.132787   
2 -0.118265 -0.121318  0.032327  0.318586 -0.061038 -0.113890 -0.060128   
3 -0.020745  0.031372  0.091024  0.217627 -0.018732  0.074014 -0.011658   
4  0.111821  0.011566