In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score,
    average_precision_score,
    f1_score,
    roc_auc_score,
)

In [2]:
def get_result(true, pred, data):
    res = pd.DataFrame()
    for i in range(5):
        true_labels = true.loc[i].dropna()
        pred_values = pred.loc[i].dropna()
        pred_labels = np.round(pred_values)
        metrics = {
            "ACC": accuracy_score(true_labels, pred_labels),
            "AUROC": roc_auc_score(true_labels, pred_values),
            "AUPR": average_precision_score(true_labels, pred_values),
            "F1": f1_score(true_labels, pred_labels),
        }
        res = pd.concat([res, pd.DataFrame([metrics])])

    means = res.mean()
    stds = res.std()

    # 平均 (± 分散) の形式に整形
    formatted = means.map("{:.3f}".format) + " (± " + stds.map("{:.3f}".format) + ")"

    # データフレームとして表示
    result_table = pd.DataFrame({data.upper(): formatted.values})

    result_table.index = means.index
    return result_table

In [3]:
res = pd.DataFrame()
for i in ["nci", "gdsc1", "gdsc2", "ctrp"]:
    true = pd.read_csv(f"true_{i}.csv", index_col=0)
    pred = pd.read_csv(f"pred_{i}.csv", index_col=0)
    res = pd.concat([res, get_result(true, pred, i)], axis=1)

In [4]:
res.T

Unnamed: 0,ACC,AUROC,AUPR,F1
NCI,0.570 (± 0.005),0.606 (± 0.008),0.600 (± 0.011),0.495 (± 0.031)
GDSC1,0.815 (± 0.002),0.937 (± 0.002),0.941 (± 0.002),0.783 (± 0.003)
GDSC2,0.806 (± 0.003),0.934 (± 0.009),0.941 (± 0.007),0.765 (± 0.005)
CTRP,0.815 (± 0.002),0.896 (± 0.002),0.897 (± 0.002),0.800 (± 0.002)
