利用LOOCV策略为每一个样本训练一个弹性网回归模型，并保存特征的系数，得到行为特征，列为每个弹性网模型得到的系数，以此计算2.5分位数以及97.5分位数

In [None]:
# 定义参数搜索范围
alpha_range = [0.1,1,10,50]
l1_ratio_range = np.arange(0, 1, 0.05)

corr = 0.2
tissue_list = ['Blood'] # ['Skin','Blood','Brain','Lung']
for tissue in tissue_list:
    model_data = pd.read_csv(f'../../train_data/bootstrap_{tissue}_{corr}_add.csv',index_col=0)

    loo = LeaveOneOut()
    # 存储预测的年龄
    age_pred_list = []
    # 使用留一法分割数据集
    print('Now is running LOOCV')
    t1 = time.time()
    coef = []
    for train,test in loo.split(model_data):
        train_x = model_data.iloc[train].iloc[:,:-1]
        train_y = model_data.iloc[train].iloc[:,-1]
        test_x = model_data.iloc[test].iloc[:,:-1]
        test_y = model_data.iloc[test].iloc[:,-1]

        best_alpha = None
        best_l1_ratio = None
        best_mean_mse = float('inf')

        # K 折交叉验证选择最优参数
        for alpha in alpha_range:
            for l1_ratio in l1_ratio_range:
                # 定义 K 折交叉验证
                k_folds = KFold(n_splits=5, shuffle=True, random_state=2024)

                # 存储每次交叉验证的均方误差
                mean_mse_list = []
                # 使用 K 折交叉验证训练模型并进行评估
                for train_index, test_index in k_folds.split(train_x):
                    X_train, X_test = train_x.iloc[train_index], train_x.iloc[test_index]
                    y_train, y_test = train_y.iloc[train_index], train_y.iloc[test_index]

                    # 创建并训练模型
                    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=2024)
                    model.fit(X_train, y_train)

                    # 在测试集上进行预测
                    y_pred = model.predict(X_test)

                    # 计算均方误差
                    mse = mean_squared_error(y_test, y_pred)
                    mean_mse_list.append(mse)

                # 计算交叉验证均方误差的平均值
                mean_mse = np.mean(mean_mse_list)

                # 更新最佳参数和最小均方误差
                if mean_mse < best_mean_mse:
                    best_mean_mse = mean_mse
                    best_alpha = alpha
                    best_l1_ratio = l1_ratio

#                     print(f"alpha = {round(alpha, 2)}, l1_ratio = {round(l1_ratio, 3)}, mean MSE = {round(mean_mse, 3)}")

        # 输出最佳参数
        print(f"Best alpha: {best_alpha}, Best l1_ratio: {best_l1_ratio}, Best mean MSE: {best_mean_mse}")

        # 留一法预测年龄
    #     best_alpha = 0.1
    #     best_l1_ratio = 0.4

        age_model = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio, random_state=2024)
        age_model.fit(train_x,train_y)
        coef.append(age_model.coef_)
    
    coef = pd.DataFrame(np.array(coef).T)

    # 计算95%置信区间
    ci_lower = np.percentile(coef,2.5,axis = 1)
    ci_upper = np.percentile(coef,97.5,axis = 1)

    ci = pd.DataFrame({'ci_lower':ci_lower,'ci_upper':ci_upper})
    df = pd.concat([coef,ci],axis = 1)
    df.to_csv(f'{tissue}_coef.csv')