Use LOOCV strategy to train an Elastic Net Regression model for each sample, save the coefficient of the feature, obtain the behavioral feature, and list it as the coefficient obtained by each Elastic Net model, so as to calculate the 2.5 quantile and 97.5 quantile.

In [None]:
# Define parameter search range
alpha_range = [0.1,1,10,50]
l1_ratio_range = np.arange(0, 1, 0.05)

corr = 0.2
tissue_list = ['Blood'] # ['Skin','Blood','Brain','Lung']
for tissue in tissue_list:
    model_data = pd.read_csv(f'../../train_data/bootstrap_{tissue}_{corr}_add.csv',index_col=0)

    loo = LeaveOneOut()
    # Store predicted age
    age_pred_list = []
    # Use leave-one-out method to split dataset
    print('Now is running LOOCV')
    t1 = time.time()
    coef = []
    for train,test in loo.split(model_data):
        train_x = model_data.iloc[train].iloc[:,:-1]
        train_y = model_data.iloc[train].iloc[:,-1]
        test_x = model_data.iloc[test].iloc[:,:-1]
        test_y = model_data.iloc[test].iloc[:,-1]

        best_alpha = None
        best_l1_ratio = None
        best_mean_mse = float('inf')

        # Select optimal parameters by K-fold cross validation
        for alpha in alpha_range:
            for l1_ratio in l1_ratio_range:
                # Define K-fold cross validation
                k_folds = KFold(n_splits=5, shuffle=True, random_state=2024)

                # Store mean square error of each cross validation
                mean_mse_list = []
                # Train model and evaluate using K-fold cross validation
                for train_index, test_index in k_folds.split(train_x):
                    X_train, X_test = train_x.iloc[train_index], train_x.iloc[test_index]
                    y_train, y_test = train_y.iloc[train_index], train_y.iloc[test_index]

                    # Create and train model
                    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=2024)
                    model.fit(X_train, y_train)

                    # Predict on test set
                    y_pred = model.predict(X_test)

                    # Calculate mean square error
                    mse = mean_squared_error(y_test, y_pred)
                    mean_mse_list.append(mse)

                # Calculate average of cross validation mean square error
                mean_mse = np.mean(mean_mse_list)

                # Update optimal parameter and minimum mean square error
                if mean_mse < best_mean_mse:
                    best_mean_mse = mean_mse
                    best_alpha = alpha
                    best_l1_ratio = l1_ratio

#                     print(f"alpha = {round(alpha, 2)}, l1_ratio = {round(l1_ratio, 3)}, mean MSE = {round(mean_mse, 3)}")

        # Output optimal parameter
        print(f"Best alpha: {best_alpha}, Best l1_ratio: {best_l1_ratio}, Best mean MSE: {best_mean_mse}")

        # Predict age by leave-one-out method
    #     best_alpha = 0.1
    #     best_l1_ratio = 0.4

        age_model = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio, random_state=2024)
        age_model.fit(train_x,train_y)
        coef.append(age_model.coef_)
    
    coef = pd.DataFrame(np.array(coef).T)

    # Calculate 95% confidence interval
    ci_lower = np.percentile(coef,2.5,axis = 1)
    ci_upper = np.percentile(coef,97.5,axis = 1)

    ci = pd.DataFrame({'ci_lower':ci_lower,'ci_upper':ci_upper})
    df = pd.concat([coef,ci],axis = 1)
    df.to_csv(f'{tissue}_coef.csv')