In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import (accuracy_score, average_precision_score, f1_score,
                             roc_auc_score)

In [6]:
def get_result(true, pred, data):
    res = pd.DataFrame()
    for i in range(5):
        true_labels = true.loc[i].dropna()
        pred_values = pred.loc[i].dropna()
        pred_labels = np.round(pred_values)
        metrics = {
            'ACC': accuracy_score(true_labels, pred_labels),
            'AUROC': roc_auc_score(true_labels, pred_values),
            'AUPR': average_precision_score(true_labels, pred_values),
            'F1': f1_score(true_labels, pred_labels)
        }
        res = pd.concat([res, pd.DataFrame([metrics])])
    
    means = res.mean()
    stds = res.std()
    
    # 平均 (± 分散) の形式に整形
    formatted = means.map("{:.3f}".format) + " (± " + stds.map("{:.3f}".format) + ")"
    
    # データフレームとして表示
    result_table = pd.DataFrame({
        data.upper(): formatted.values
    })
    
    result_table.index = means.index
    return result_table

In [7]:
true = pd.read_csv("true_nci.csv", index_col=0)
pred = pd.read_csv("pred_nci.csv", index_col=0)

In [8]:
res = pd.DataFrame()
for i in ['nci', 'gdsc1', 'gdsc2', 'ctrp']:
    true = pd.read_csv(f'true_{i}.csv', index_col=0)
    pred = pd.read_csv(f'pred_{i}.csv', index_col=0)
    res = pd.concat([res, get_result(true, pred, i)], axis=1)

In [9]:
res.T

Unnamed: 0,ACC,AUROC,AUPR,F1
NCI,0.780 (± 0.006),0.864 (± 0.005),0.867 (± 0.005),0.769 (± 0.007)
GDSC1,0.881 (± 0.002),0.977 (± 0.001),0.978 (± 0.000),0.868 (± 0.003)
GDSC2,0.878 (± 0.005),0.984 (± 0.001),0.984 (± 0.001),0.864 (± 0.006)
CTRP,0.863 (± 0.002),0.944 (± 0.001),0.946 (± 0.001),0.856 (± 0.002)
