In [10]:
%cd /root/autodl-tmp/tigramite


/root/autodl-tmp/tigramite


In [11]:
import os

import numpy as np
from sklearn.metrics import roc_auc_score

from tigramite import data_processing as pp
from tigramite.independence_tests.parcorr import ParCorr
from tigramite.pcmci import PCMCI

# 定义参数
lag = 5
seed = 981  # 随机种子
tau_to_plot = 1  # 选择滞后时间 tau = 1


# 函数用于分析数据并计算 AUROC
def analyze_data(X, GC, var_names):
    # 创建 Tigramite 数据框
    dataframe = pp.DataFrame(X, var_names=var_names)

    # 初始化部分相关性检验对象
    parcorr = ParCorr(significance='analytic')

    # 初始化 PCMCI 对象
    pcmci = PCMCI(
        dataframe=dataframe,
        cond_ind_test=parcorr,
        verbosity=1
    )

    # 运行 PCMCI 以获取滞后依赖关系
    tau_max = lag  # 设置最大滞后时间
    results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=0.05)  # 设置显著性水平 (alpha)

    # 提取 p 值矩阵
    p_matrix = results['p_matrix']

    # 计算 AUROC（你可以根据自己的实际情况定义这个函数）
    auroc = compute_auroc(GC, p_matrix, tau_to_plot)

    return auroc, results


# AUROC 计算函数，假设我们定义一个简单的 AUROC 计算方式
def compute_auroc(GC, p_matrix, tau):
    # 这里假设 GC_true 和 p_matrix 的形状相同，并计算基于 p 值的 AUROC
    # 你可以根据实际需要来修改 AUROC 计算方法
    # 把 p_matrix 中的 p 值和真实的 GC 关系展平为一维
    p_values = p_matrix[:, :, tau].flatten()
    GC_values = GC.flatten()
    return roc_auc_score(GC_values, -p_values)  # 负 p 值越小说明越可能存在因果关系


# 读取指定的 .npz 文件路径
def process_files(input_files, output_base_folder):
    for file_path in input_files:
        print(f"Processing {file_path}...")

        # 载入数据
        data = np.load(file_path)
        X = data['X']  # 假设数据中有 'X' 和 'GC' 键
        GC = data['GC']

        # 生成变量名
        var_names = [f'X{i}' for i in range(X.shape[1])]

        # 分析数据
        auroc, results = analyze_data(X, GC, var_names)

        # 输出 AUROC
        print(f"AUROC for {file_path}: {auroc}")

        # 确定输出路径
        # 保留输入文件的目录结构，去掉原输入文件夹路径，保留子路径
        relative_path = os.path.relpath(file_path, start='datasets')  # 假设 "datasets" 是根目录
        output_folder = os.path.join(output_base_folder, os.path.dirname(relative_path))

        # 创建输出目录（如果不存在）
        os.makedirs(output_folder, exist_ok=True)

        # 构建保存结果的文件路径
        results_filename = os.path.join(output_folder, os.path.basename(file_path).replace('.npz', '_results.npz'))

        # 保存结果
        np.savez(results_filename, results=results, auroc=auroc)
        print(f"Saved results to {results_filename}")


# 设置输入文件夹路径数组
input_files_folder = [
    'datasets/lorenz/F10/time200',  # 示例文件路径，添加你实际的文件路径
    'datasets/lorenz/F10/time500',  # 示例文件路径，添加你实际的文件路径
    'datasets/lorenz/F10/time1000',  # 示例文件路径，添加你实际的文件路径
    'datasets/lorenz/F20/time200',  # 示例文件路径，添加你实际的文件路径
    'datasets/lorenz/F20/time500',  # 示例文件路径，添加你实际的文件路径
    'datasets/lorenz/F20/time1000',  # 示例文件路径，添加你实际的文件路径    
    'datasets/var/lag2-sp2/time200',
    'datasets/var/lag2-sp2/time500',
    'datasets/var/lag2-sp2/time1000',
    'datasets/var/lag3-sp2/time200',
    'datasets/var/lag3-sp2/time500',
    'datasets/var/lag3-sp2/time1000',
    'datasets/var/lag3-sp3/time200',
    'datasets/var/lag3-sp3/time500',
    'datasets/var/lag3-sp3/time1000',
    'datasets/var/lag3-sp4/time200',
    'datasets/var/lag3-sp4/time500',
    'datasets/var/lag3-sp4/time1000'
]

# 输出结果的根目录路径
output_base_folder = 'output/results'

# 构建 input_files 数组，收集每个文件夹中的 .npz 文件
input_files = []
for folder in input_files_folder:
    # 检查该文件夹是否存在
    if os.path.isdir(folder):
        # 获取该文件夹中的所有 .npz 文件
        npz_files = [folder + '/' + f for f in os.listdir(folder) if f.endswith('.npz')]
        input_files.extend(npz_files)
    else:
        print(f"Folder does not exist: {folder}")

# 处理所有文件
process_files(input_files, output_base_folder)



Processing datasets/lorenz/F10/time200/lorenz-584-F10-200.npz...

##
## Step 1: PC1 algorithm for selecting lagged conditions
##

Parameters:
independence test = par_corr
tau_min = 1
tau_max = 5
pc_alpha = [0.05]
max_conds_dim = None
max_combinations = 1


## Resulting lagged parent (super)sets:

    Variable X0 has 7 link(s):
        (X0 -1): max_pval = 0.00000, |min_val| =  0.772
        (X0 -2): max_pval = 0.00000, |min_val| =  0.334
        (X0 -4): max_pval = 0.00008, |min_val| =  0.284
        (X7 -5): max_pval = 0.00036, |min_val| =  0.260
        (X6 -2): max_pval = 0.00696, |min_val| =  0.196
        (X2 -2): max_pval = 0.01172, |min_val| =  0.185
        (X8 -1): max_pval = 0.04583, |min_val| =  0.147

    Variable X1 has 5 link(s):
        (X1 -1): max_pval = 0.00000, |min_val| =  0.846
        (X1 -2): max_pval = 0.00000, |min_val| =  0.550
        (X1 -3): max_pval = 0.00003, |min_val| =  0.295
        (X8 -5): max_pval = 0.00048, |min_val| =  0.253
        (X1 -4): max_pv

In [12]:
import os

print(os.getcwd())


/root/autodl-tmp/tigramite
