In [None]:
import pandas as pd
import numpy as np
import anndata as ann
import scanpy as sc
import os, sys
from scipy.stats import pearsonr as pr
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score as f1
from sklearn.metrics import precision_recall_curve as prc
from sklearn.metrics import silhouette_score as sil
from sklearn.metrics import auc
from sklearn.metrics import roc_curve

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, average_precision_score


In [None]:
sc.set_figure_params(dpi=400)

# Load Anndata

In [None]:
# load data
adata = sc.read('METMAP500.h5ad')
adata

  utils.warn_names_duplicates("obs")


AnnData object with n_obs × n_vars = 259 × 1512
    obs: 'Tissue', 'prim_or_metas', 'metas_site'
    var: 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm'
    uns: 'hvg', 'log1p'

In [None]:
# reformat the tissue as just the name
from collections import Counter
all_tissues = adata.obs['Tissue'].unique()
new_tissues = []
for tissue in adata.obs['Tissue']:
    if tissue.startswith('CNS/BRAIN'):
        new_tissues.append('BRAIN')
    else:
        new_tissues.append(tissue.split('/')[0].split('_')[0])
adata.obs['new_tissue'] = new_tissues
adata.obs['new_tissue'].unique()

array(['BOWEL', 'SKIN', 'LUNG', 'BLADDER', 'BRAIN', 'PANCREAS', 'OVARY',
       'KIDNEY', 'HEAD AND NECK', 'UTERUS', 'LIVER', 'BREAST',
       'ESOPHAGUS'], dtype=object)

# One hot encoding

In [6]:
# one hot encoding the labels
def one_hot_encode(labels, num_classes):
    unique_labels = list(set(labels))
    
    one_hot_encoded = np.zeros(shape=(len(labels), num_classes))

    # Encode labels by setting the corresponding index to 1 in each row
    for i, label in enumerate(labels):
        index = unique_labels.index(label)
        one_hot_encoded[i, index] = 1

    return one_hot_encoded, unique_labels

In [7]:
one_hot_labels_tissue,_ = one_hot_encode(adata.obs['new_tissue'], len(adata.obs['new_tissue'].unique()))

In [8]:
one_hot_labels_site, uniqe_labels_site = one_hot_encode(adata.obs['metas_site'], len(adata.obs['metas_site'].unique()))

## One vs. All

In [9]:
one_hot_labels_site_one_vs_all = dict()
all_sites = adata.obs.metas_site.value_counts().index
for tissue in all_sites:
    print(tissue)
    tissues_to_learn = tissue
    one_vs_all_labels = []
    for i in adata.obs.metas_site:
        if  i == tissues_to_learn:
            one_vs_all_labels.append(1)
        else:
            one_vs_all_labels.append(0)
    adata.obs[tissue+'.1va'] = one_vs_all_labels
    one_hot_labels_site_one_vs_all[tissue], _ = one_hot_encode(adata.obs[tissue+'.1va'], len(adata.obs[tissue+'.1va'].unique()))

Neg
kidney
liver
bone
lung
brain


In [10]:
adata.obs

Unnamed: 0,Tissue,prim_or_metas,metas_site,new_tissue,Neg.1va,kidney.1va,liver.1va,bone.1va,lung.1va,brain.1va
ACH-000007,BOWEL,Primary,Neg,BOWEL,1,0,0,0,0,0
ACH-000008,SKIN,Primary,liver,SKIN,0,0,1,0,0,0
ACH-000012,LUNG,Primary,bone,LUNG,0,0,0,1,0,0
ACH-000015,LUNG,Primary,liver,LUNG,0,0,1,0,0,0
ACH-000018,BLADDER/URINARY TRACT,Primary,brain,BLADDER,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
ACH-000996,UTERUS,Primary,bone,UTERUS,0,0,0,1,0,0
ACH-000997,BOWEL,Primary,lung,BOWEL,0,0,0,0,1,0
ACH-001016,CNS/BRAIN,Primary,kidney,BRAIN,0,1,0,0,0,0
ACH-001113,LUNG,Primary,brain,LUNG,0,0,0,0,0,1


# Dataloader

In [11]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import random_split

In [12]:
class GeneExpressionDataset(torch.utils.data.Dataset):
    def __init__(self, data, tumors, metas_sites):
        self.data = data  # gene expression data (shape: N x num_genes)
        self.tumors = tumors  # primary tumor type
        self.site = metas_sites # metastasis sites

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        expression = self.data[idx]  # single gene expression vector
        tumor = self.tumors[idx]  
        site = self.site[idx]
        return expression, tumor, site

In [13]:
# min max normalization
df_normalized = adata.to_df().apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1)
df_normalized

Unnamed: 0,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001617,ENSG00000001626,ENSG00000002586,ENSG00000002726,ENSG00000002822,ENSG00000003147,ENSG00000003402,...,ENSG00000259207,ENSG00000261052,ENSG00000264424,ENSG00000271503,ENSG00000275385,ENSG00000275410,ENSG00000275718,ENSG00000275896,ENSG00000277443,ENSG00000277586
ACH-000007,0.001362,0.418544,0.369698,0.106475,0.408904,0.474120,0.458151,0.440864,0.464034,0.260111,...,0.001478,0.279958,0.017318,0.068780,0.000000,0.219731,0.493214,0.421571,0.273935,0.049319
ACH-000008,0.006430,0.477888,0.343918,0.017110,0.016921,0.478341,0.010205,0.480758,0.066012,0.335006,...,0.545344,0.276866,0.001148,0.127957,0.065129,0.003036,0.007540,0.011576,0.222305,0.393855
ACH-000012,0.191250,0.456729,0.308974,0.020991,0.001036,0.381573,0.035220,0.272343,0.308789,0.430792,...,0.112426,0.328580,0.000000,0.033411,0.000000,0.325642,0.004284,0.000000,0.372882,0.002864
ACH-000015,0.257558,0.451050,0.317799,0.103040,0.004082,0.413896,0.000636,0.338382,0.172822,0.322058,...,0.024053,0.230060,0.000000,0.024011,0.000000,0.002155,0.005483,0.000000,0.280227,0.057089
ACH-000018,0.153213,0.479226,0.263913,0.387343,0.000931,0.644546,0.009856,0.399677,0.156312,0.350153,...,0.242461,0.310473,0.000000,0.144101,0.000000,0.274338,0.000000,0.088571,0.421349,0.004805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ACH-000996,0.074882,0.440487,0.295306,0.185086,0.048008,0.288487,0.006353,0.286848,0.340713,0.236046,...,0.164890,0.289611,0.000000,0.085934,0.000000,0.233327,0.004805,0.080224,0.190480,0.000000
ACH-000997,0.003535,0.544124,0.340695,0.161163,0.027014,0.531133,0.003096,0.303902,0.374838,0.413348,...,0.011084,0.287474,0.000000,0.000000,0.000000,0.218204,0.030232,0.003117,0.249225,0.006531
ACH-001016,0.034719,0.436016,0.333444,0.196966,0.012406,0.571435,0.000000,0.319017,0.000000,0.367459,...,0.217110,0.375106,0.000000,0.005349,0.000000,0.000000,0.000000,0.000000,0.490580,0.000862
ACH-001113,0.086371,0.221148,0.484695,0.234437,0.000000,0.387764,0.007274,0.227263,0.124448,0.373009,...,0.076591,0.368189,0.000000,0.019027,0.000000,0.000000,0.000000,0.000000,0.509043,0.410519


# Stratified Five fold CV

In [14]:
all_data = GeneExpressionDataset(torch.tensor(df_normalized.to_numpy(), dtype=torch.float32), 
                                 torch.tensor(one_hot_labels_tissue, dtype=torch.float32),
                                 torch.tensor(one_hot_labels_site, dtype=torch.float32))

In [15]:
from sklearn.model_selection import StratifiedKFold
# import numpy as np


# Convert to class indices
tissue_labels = np.argmax(one_hot_labels_tissue, axis=1)  # Convert one-hot to class indices
site_labels = np.argmax(one_hot_labels_site, axis=1)  # Convert one-hot to class indices

# Prepare StratifiedKFold
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)  # Use a fixed random_state for repeatability

# Store seeds and indices
cv_splits = []

batch_size = 8

# Iterate through the splits
for fold, (train_idx, val_idx) in enumerate(skf.split(df_normalized.to_numpy(), site_labels)):
    # print(train_idx)
    # print(val_idx)
    print(f'Fold {fold+1}')
    
    # Extract the training and validation data based on the indices
    train_data = torch.utils.data.Subset(all_data, train_idx)
    val_data = torch.utils.data.Subset(all_data, val_idx)

    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
    
    cv_splits.append({'train_idx': train_idx, 'val_idx': val_idx})
    
    # Check stratification by printing unique labels in the validation set
    val_tissue_labels = np.argmax(one_hot_labels_tissue[val_idx], axis=1)
    val_site_labels = np.argmax(one_hot_labels_site[val_idx], axis=1)
    
    print('Validation Tissue Labels:', np.unique(val_tissue_labels, return_counts=True), len(np.unique(val_tissue_labels)))
    print('Validation Site Labels:', np.unique(val_site_labels, return_counts=True), len(np.unique(val_site_labels)))

Fold 1
Validation Tissue Labels: (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 11, 12]), array([4, 3, 1, 6, 3, 5, 5, 6, 5, 4, 3, 7])) 12
Validation Site Labels: (array([0, 1, 2, 3, 4, 5]), array([ 5, 11, 15,  7,  8,  6])) 6
Fold 2
Validation Tissue Labels: (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([ 5,  6,  2,  3,  1, 13,  3,  2,  2,  8,  2,  3,  2])) 13
Validation Site Labels: (array([0, 1, 2, 3, 4, 5]), array([ 5, 11, 15,  7,  8,  6])) 6
Fold 3
Validation Tissue Labels: (array([ 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12]), array([ 7,  4,  3,  2,  2,  9,  2,  2, 10,  2,  5,  4])) 12
Validation Site Labels: (array([0, 1, 2, 3, 4, 5]), array([ 5, 11, 14,  8,  8,  6])) 6
Fold 4
Validation Tissue Labels: (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([2, 5, 5, 3, 1, 9, 7, 2, 2, 5, 3, 4, 4])) 13
Validation Site Labels: (array([0, 1, 2, 3, 4, 5]), array([ 5, 11, 14,  8,  8,  6])) 6
Fold 5
Validation Tissue Labels: (array([ 0,  1,  2,  3, 

# PreMet

## Model

In [16]:
def initialize_weights_xavier(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)  # Xavier initialization
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Conv2d):
        nn.init.xavier_normal_(m.weight)  # Xavier initialization
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm1d) or isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
        

In [17]:
class Encoder(nn.Module):
    def __init__(self, gene_num, latent_size, hidden_dim=128):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(gene_num, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2_mean = nn.Linear(hidden_dim, latent_size)
        self.fc2_logvar = nn.Linear(hidden_dim, latent_size)
        # Apply Xavier initialization
        self.apply(initialize_weights_xavier)  

        
    def forward(self, x):
        # switch to leaky RELU
        h = F.leaky_relu(self.bn1(self.fc1(x)), negative_slope=0.01)
        mean = self.fc2_mean(h)
        logvar = self.fc2_logvar(h)
        return mean, logvar
    

class Decoder(nn.Module):
    def __init__(self, latent_size, gene_num, hidden_dim=128):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(latent_size, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, gene_num)
        # Apply Xavier initialization
        self.apply(initialize_weights_xavier)  
        
    def forward(self, z):
        h = F.leaky_relu(self.bn1(self.fc1(z)))
        reconstructed_x = self.fc2(h)  
        return reconstructed_x



class Primary_tumor_DNN(nn.Module):
    def __init__(self, num_latent, num_classes):
        super(Primary_tumor_DNN, self).__init__()
        self.fc1 = nn.Linear(num_latent, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        # self.bn2 = nn.BatchNorm1d(128)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.fc2(x)
        #  x = self.fc3(x)  
        return self.softmax(x)


class Metas_site_DNN(nn.Module):
    def __init__(self, num_latent, num_classes):
        super(Metas_site_DNN, self).__init__()
        self.fc1 = nn.Linear(num_latent, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        # self.bn2 = nn.BatchNorm1d(128)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.fc2(x)
        #  x = self.fc3(x)  
        return self.softmax(x)
    
# Define the VAE

class PreMet(nn.Module):
    def __init__(self, gene_num, latent_size, num_tissues, num_metas_sites, hidden_size):
        super(PreMet, self).__init__()
        self.encoder = Encoder(gene_num, latent_size, hidden_dim=hidden_size)
        self.decoder = Decoder(latent_size, gene_num, hidden_dim=hidden_size)
        self.tissue_nn = Primary_tumor_DNN(latent_size, num_tissues)
        self.site_nn = Metas_site_DNN(latent_size, num_metas_sites)
        
    def reparameterize(self, mean, logvar):
        if self.training:
            std = torch.exp(0.5 * logvar)
            eps = torch.randn_like(std)
            return mean + eps * std
        else:
            return mean
    
    def forward(self, x):
        mean, logvar = self.encoder(x)
        
        z = self.reparameterize(mean, logvar)
        reconstructed_x = self.decoder(z)
        
        # predict tumor type from latent
        tissue_output = self.tissue_nn(z)
        
        # predict metastasis site from latent
        site_output = self.site_nn(z)
        
        return reconstructed_x, mean, logvar, tissue_output, site_output

In [18]:
def premet_predict(model, data_loader):
    model.eval()
    all_labels = []
    with torch.no_grad():
        for data in data_loader:
            # print(data)
            data = data[0].to(next(model.parameters()).device)  # Ensure data is on the same device as the model
            mean, logvar = model.encoder(data)
            z = model.reparameterize(mean, logvar)
            labels = model.site_nn(z)
            # print(labels)
            all_labels.append(labels)
            
    all_labels = torch.cat(all_labels, dim=0).cpu().numpy()
    return all_labels

# Load best model

In [19]:
best_alpha = 0.25
best_gamma = 4
best_model_dir = '/depot/natallah/data/Luopin/Metestasis/5fold_CV_model_focal_loss_alpha'+str(best_alpha)+'_gamma'+str(best_gamma)+'/'
all_best_models = os.listdir(best_model_dir)
all_best_models.sort()

all_metrics = set()

for m in all_best_models:
    all_metrics.add(m.split('.')[-2])
all_metrics = list(all_metrics)
all_metrics.sort()
all_metrics

['accuracy', 'auc', 'auprc', 'avg', 'f1_score']

### PreMet-VAE (last row in Table 3.1)

In [20]:
final_prediction_metric_dict = dict()
prior_prediction_metric_dict = dict()

all_ground_truth_dfs = dict()
all_prediction_value_dfs = dict()

for metric_idx in range(len(all_metrics)):
    final_prediction_metric_dict[all_metrics[metric_idx]] = dict()
    prior_prediction_metric_dict[all_metrics[metric_idx]] = dict()
    all_ground_truth_dfs[all_metrics[metric_idx]] = dict()
    all_prediction_value_dfs[all_metrics[metric_idx]] = dict()
    
    fold = 1
    prior_accuracies = []
    prior_f1_scores = []
    prior_aurocs = []
    prior_auprcs = []

    accuracies = []
    f1_scores = []
    aurocs = []
    auprcs = []

    for split in cv_splits:
        # print('*' * 10, fold, '*' * 10)
        
        all_ground_truth_dfs[all_metrics[metric_idx]][fold] = dict()
        all_prediction_value_dfs[all_metrics[metric_idx]][fold] = dict()

        all_data = GeneExpressionDataset(torch.tensor(df_normalized.to_numpy(), dtype=torch.float32), 
                                        torch.tensor(one_hot_labels_tissue, dtype=torch.float32),
                                        torch.tensor(one_hot_labels_site, dtype=torch.float32))
        # Extract the training and validation data based on the indices
        val_data = torch.utils.data.Subset(all_data, split['val_idx'])
        val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False)        

        # load the best model for different metrics

        for metric in [all_metrics[metric_idx]]:
            # print('+' * 10, metric, '+' * 10)
            best_models = dict()
            # load the model
            for site in all_sites:
                # print('$' * 10, site, '$' * 10)
                model = PreMet(gene_num=adata.n_vars, latent_size=32, 
                                num_tissues=len(adata.obs['new_tissue'].unique()),
                                num_metas_sites=len(adata.obs[tissue + '.1va'].unique()),
                                hidden_size=64)

                model.load_state_dict(torch.load(best_model_dir + site + '.split_' + str(fold) + '.' + metric + '.pth'))
                best_models[site] = model
                """
                for name, param in model.named_parameters():
                    print(f"Parameter: {name}, Size: {param.size()}")
                    print(param.data)
                    break
                """

            predictions = dict()
            ground_truth_dfs = adata.obs.iloc[split['val_idx']]
            for site in all_sites:
                preds = premet_predict(best_models[site], val_dataloader)
                # the second column indicates sample predicted to be this label
                predictions[site] = preds[:, 1]
            prediction_df = pd.DataFrame.from_dict(predictions)
            new_names = [s + '.1va' for s in all_sites]
            ground_truth_dfs = ground_truth_dfs[new_names]
            
            # save the predictions and ground truth for confusionmatrix
            all_ground_truth_dfs[all_metrics[metric_idx]][fold] = ground_truth_dfs
            all_prediction_value_dfs[all_metrics[metric_idx]][fold] = prediction_df

            # Convert the DataFrames to numpy arrays
            true_labels = ground_truth_dfs.to_numpy()
            pred_probs = prediction_df.to_numpy()

            # Calculate priors

            # Convert one-hot encoded labels to class indices
            true_labels_val = np.argmax(true_labels, axis=1)

            # Calculate the class distribution
            class_counts = np.bincount(true_labels_val)
            total_samples = len(true_labels_val)
            most_frequent_class = class_counts.max()

            # Prior Accuracy: The frequency of the most common class
            prior_accuracy = most_frequent_class / total_samples

            # Prior F1 Score: Macro F1 score assuming all samples are predicted as the most frequent class
            dummy_predictions = np.full_like(true_labels_val, fill_value=np.argmax(class_counts))  # Predicting the most frequent class for all
            prior_f1_score = f1(true_labels_val, dummy_predictions, average='macro')

            # Prior AUROC: This will be 0.5 for random guessing in multi-class problems (you can calculate class-wise AUROC if needed)
            prior_auroc = 0.5

            # Prior AUPRC: Class-wise AUPRC (One-vs-Rest)
            prior_auprc = average_precision_score(np.eye(len(class_counts))[true_labels_val], np.eye(len(class_counts))[dummy_predictions], average='macro')

            # Store prior metrics
            prior_accuracies.append(prior_accuracy)
            prior_f1_scores.append(prior_f1_score)
            prior_aurocs.append(prior_auroc)
            prior_auprcs.append(prior_auprc)

            # Print prior metrics
            """
            print(f'Prior Accuracy: {prior_accuracy:.4f}')
            print(f'Prior F1 Score: {prior_f1_score:.4f}')
            print(f'Prior AUROC: {prior_auroc:.4f}')
            print(f'Prior AUPRC: {prior_auprc:.4f}')
            print('#' * 10)
            """

            # Calculate metrics
            accuracy = accuracy_score(np.argmax(true_labels, axis=1), np.argmax(pred_probs, axis=1))
            f1_score = f1(np.argmax(true_labels, axis=1), np.argmax(pred_probs, axis=1), average='macro')
            auroc = roc_auc_score(true_labels, pred_probs, multi_class='ovr')

            # Calculate AUPRC for each class and average
            precision = dict()
            recall = dict()
            auprc = []
            for i in range(true_labels.shape[1]):
                precision[i], recall[i], _ = prc(true_labels[:, i], pred_probs[:, i])
                auprc.append(auc(recall[i], precision[i]))
            avg_auprc = np.mean(auprc)

            # Store metrics
            accuracies.append(accuracy)
            f1_scores.append(f1_score)
            aurocs.append(auroc)
            auprcs.append(avg_auprc)

        fold += 1

    prior_prediction_metric_dict[all_metrics[metric_idx]]['f1'] = prior_f1_scores
    prior_prediction_metric_dict[all_metrics[metric_idx]]['accuracy'] = prior_accuracies
    prior_prediction_metric_dict[all_metrics[metric_idx]]['auroc'] = prior_aurocs
    prior_prediction_metric_dict[all_metrics[metric_idx]]['auprc'] = prior_auprcs
        
    final_prediction_metric_dict[all_metrics[metric_idx]]['f1'] = f1_scores
    final_prediction_metric_dict[all_metrics[metric_idx]]['accuracy'] = accuracies
    final_prediction_metric_dict[all_metrics[metric_idx]]['auroc'] = aurocs
    final_prediction_metric_dict[all_metrics[metric_idx]]['auprc'] = auprcs


In [21]:
final_prediction_metric_df = pd.DataFrame.from_dict(final_prediction_metric_dict)
final_prediction_metric_df
final_prediction_metric_avg = []
for idx in final_prediction_metric_df.index:
    tmp = []
    for col in final_prediction_metric_df.columns:
        tmp.append(np.mean(final_prediction_metric_df[col].loc[idx]))
    final_prediction_metric_avg.append(tmp)

final_prediction_metric_avg_df = pd.DataFrame(final_prediction_metric_avg)
final_prediction_metric_avg_df.columns =  final_prediction_metric_df.columns
final_prediction_metric_avg_df.index =  final_prediction_metric_df.index
final_prediction_metric_avg_df

Unnamed: 0,accuracy,auc,auprc,avg,f1_score
f1,0.482891,0.409846,0.377033,0.503269,0.502824
accuracy,0.501885,0.459502,0.401735,0.501885,0.521569
auroc,0.70756,0.791326,0.734091,0.766492,0.722608
auprc,0.451378,0.443024,0.549126,0.508891,0.42614


In [22]:
# we use "avg" as the final score across all models, the mean score between f1, acc, roc and prc
# this is also what's reported in the paper
best_metric = 'avg'
best_final_prediction_metrid_df = pd.DataFrame.from_dict(final_prediction_metric_dict[best_metric])
display(best_final_prediction_metrid_df)
for col in best_final_prediction_metrid_df.columns:
    # print(idx)
    # tmp = []
    tmp = best_final_prediction_metrid_df[col]
    # final_prediction_metric_avg.append(tmp)
    print(f'{col} ${np.mean(tmp):.2f}\pm{np.std(tmp):.2f}$')

Unnamed: 0,f1,accuracy,auroc,auprc
0,0.536508,0.519231,0.793021,0.580597
1,0.558558,0.557692,0.78391,0.572512
2,0.463477,0.480769,0.745099,0.467925
3,0.46375,0.461538,0.778282,0.448059
4,0.494052,0.490196,0.732146,0.475363


f1 $0.50\pm0.04$
accuracy $0.50\pm0.03$
auroc $0.77\pm0.02$
auprc $0.51\pm0.06$
