In [1]:
# 导入模块
import numpy as np      
from scipy import stats     # 计算相关
from sklearn.tree import DecisionTreeRegressor      # 回归决策树
from tqdm import trange     # 显示循环进度
from sklearn.metrics import mean_squared_error      # 计算MSE

In [2]:
# 导入数据
fc_path = "D:\PLY\honesty\GRETNA_workspace\Power_FC\GretnaSFCMatrixZ"       # 功能连接路径
behavior_path = "D:\PLY\honesty\Behavior_text\lierate.txt"      # 行为数据路径
behavior = np.loadtxt(behavior_path)

In [3]:
# 读取每个被试的功能连接矩阵并拼接
joint_fc = []
for i in range(102, 133):       # 注意range的范围
    if i==105 or i==117:
        continue
    file_path = fc_path + '\zSub_' + str(i) + '.txt'
    raw_fc = np.loadtxt(file_path)     # 读取txt文件，raw_fc.shape查看行列数
    clear_fc = raw_fc[np.triu_indices(raw_fc.shape[0], k=1)]      # 取上三角矩阵并转成一行值
    joint_fc.append(clear_fc)       # 用列表拼接每个被试的值
final_fc = np.array(joint_fc)       # 列表转成数组
# final_fc.shape        # 查看生成的数组行列数是否正确
# 不要保存太多中间变量

In [4]:
# 使用留一法训练被试的回归树模型
sub = final_fc.shape[0]     # 被试数赋值给变量sub，便于使用
num_fc = final_fc.shape[1]      # 功能连接数赋值给变量num_fc，便于构造空数组
behavior = behavior.reshape((sub, 1))        # reshape行为数据，确保相关不报错
r_corr = np.zeros(num_fc)       # 构造空的单个被试的R值与P值数组
p_corr = np.zeros(num_fc)
r_all = []      # 定义空列表记录全部被试的R值和P值
p_all = []
fc_frequency = np.zeros(num_fc)     # 构造空的功能连接计数数组，用于统计survival rate
predict_behavior = []        # 定义空列表记录全部被试预测的行为结果
for i in trange(sub):       # trange显示循环进度
    # 提取数据集
    train_fc = np.delete(final_fc, i, axis=0)       # 留一法提取数据集
    test_fc = final_fc[i]
    train_behavior = np.delete(behavior, i, axis=0)
    test_behavior = behavior[i]
    # 计算相关
    for j in range(num_fc):
        r_corr[j], p_corr[j] = stats.spearmanr(train_fc[:, j], train_behavior)      # 使用spearman相关计算功能连接与行为相关的R值与P值
        r_all.append(r_corr)        # 记录全部被试的功能连接与行为相关的R值和P值
        p_all.append(p_corr)
    # 特征选取（R）
    selection = r_corr > np.percentile(r_corr, 95)      # 选取有最大前5%的R值的功能连接值作为特征
    fc_frequency += selection       # 统计每个功能连接出现的次数
    # 构造数据集
    train_set = train_fc[:, selection]      # 使用选取的特征构造训练集和测试集
    test_set = test_fc[selection]
    test_set = test_set.reshape((1,-1))
    # 训练回归树模型
    regression = DecisionTreeRegressor(max_depth = 5)
    regression.fit(train_set, train_behavior)       # fit训练决策树
    predict_result = regression.predict(test_set)       # predict预测测试集结果
    predict_behavior.append(predict_result)     # 连接29名被试predict的结果
# MSE评估模型表现
model_mse = mean_squared_error(predict_behavior, behavior)
print(model_mse)

100%|██████████| 29/29 [11:00<00:00, 22.79s/it]0.026113693950707836



In [None]:
# 计算survival rate
fc_robust= fc_frequency[(fc_frequency / 29) > 0.95]
# np.argwhere(fc_frequency >= 28)