In [3]:
from lxh_prediction.exp_utils import get_cv_preds
from lxh_prediction.compare_auc_delong_xu import delong_roc_test
import itertools
import numpy as np
import pandas as pd


def compare_p_values(compare_set):
    # Inference with CV
    cv_y_prob_dict = {}
    for model_name, feat_collection in compare_set:
        cv_y_prob_dict[(model_name, feat_collection)] = get_cv_preds(
            model_name=model_name,
            feat_collection=feat_collection,
            update=False,
            resample_train=False,
        )

    # Compare p-values
    pvalues = np.zeros((len(compare_set), len(compare_set), 5), dtype=float)
    for i, j in itertools.combinations(range(len(compare_set)), 2):
        for k in range(5):
            y1, prob1 = cv_y_prob_dict[compare_set[i]][k]
            y2, prob2 = cv_y_prob_dict[compare_set[j]][k]
            assert np.all(y1 == y2)
            y = np.asarray(y1).reshape(-1)
            prob1 = np.asarray(prob1).reshape(-1)
            prob2 = np.asarray(prob2).reshape(-1)
            log10p = delong_roc_test(y, prob1, prob2)

            pvalues[i, j, k] = np.power(10, log10p).reshape(-1)[0]
            pvalues[j, i, k] = np.power(10, log10p).reshape(-1)[0]

    # format
    pvalues_disp = [[""] * len(compare_set) for _ in compare_set]
    for i, j in itertools.combinations(range(len(compare_set)), 2):
        ps = pvalues[i, j]
        text = f"{np.median(ps):.2g}[{ps.min():.2g}, {ps.max():.2g}]"
        pvalues_disp[i][j] = text
        pvalues_disp[j][i] = text
    pvalues_disp = pd.DataFrame(pvalues_disp, index=compare_set, columns=compare_set)

    return pvalues_disp


In [4]:
compare_set = [
    ("LightGBMModel", "full_non_lab"),
    ("ANNModel", "full_non_lab"),
    ("RandomForestModel", "full_non_lab"),
    ("SVMModel", "full_non_lab"),
    ("LogisticRegressionModel", "full_non_lab"),
]
compare_p_values(compare_set)


Unnamed: 0,"(LightGBMModel, full_non_lab)","(ANNModel, full_non_lab)","(RandomForestModel, full_non_lab)","(SVMModel, full_non_lab)","(LogisticRegressionModel, full_non_lab)"
"(LightGBMModel, full_non_lab)",,"0.63[0.34, 0.74]","2e-06[1.7e-08, 9.9e-06]","3e-08[4.6e-11, 0.00066]","3.4e-08[2.4e-09, 0.00037]"
"(ANNModel, full_non_lab)","0.63[0.34, 0.74]",,"4.2e-06[2.4e-09, 0.00035]","8.3e-08[4.7e-11, 0.0031]","1.8e-08[1.1e-10, 0.0017]"
"(RandomForestModel, full_non_lab)","2e-06[1.7e-08, 9.9e-06]","4.2e-06[2.4e-09, 0.00035]",,"0.76[0.7, 0.8]","0.68[0.61, 0.95]"
"(SVMModel, full_non_lab)","3e-08[4.6e-11, 0.00066]","8.3e-08[4.7e-11, 0.0031]","0.76[0.7, 0.8]",,"0.7[0.092, 0.89]"
"(LogisticRegressionModel, full_non_lab)","3.4e-08[2.4e-09, 0.00037]","1.8e-08[1.1e-10, 0.0017]","0.68[0.61, 0.95]","0.7[0.092, 0.89]",


In [5]:
compare_set = [
    ("LightGBMModel", "top20_non_lab"),
    ("CHModel", "CH"),
    ("ADAModel", "ADA"),
    ("LightGBMModel", "FPG"),
    ("LightGBMModel", "2hPG"),
    ("LightGBMModel", "HbA1c"),
    ]
compare_p_values(compare_set)


Unnamed: 0,"(LightGBMModel, top20_non_lab)","(CHModel, CH)","(ADAModel, ADA)","(LightGBMModel, FPG)","(LightGBMModel, 2hPG)","(LightGBMModel, HbA1c)"
"(LightGBMModel, top20_non_lab)",,"4.9e-05[1e-05, 0.0019]","7.1e-07[5.7e-08, 0.0067]","2.9e-20[1.5e-26, 4.9e-19]","2.2e-31[2.4e-32, 2.3e-20]","1.9e-10[2.1e-13, 1.6e-07]"
"(CHModel, CH)","4.9e-05[1e-05, 0.0019]",,"0.067[0.0066, 0.96]","7.5e-36[8.1e-38, 9.4e-31]","3.4e-38[7.7e-45, 2.2e-33]","2e-17[1.2e-24, 3e-14]"
"(ADAModel, ADA)","7.1e-07[5.7e-08, 0.0067]","0.067[0.0066, 0.96]",,"1.1e-37[4.5e-44, 2.1e-32]","7.8e-42[9.3e-53, 1.5e-34]","2.8e-19[2.3e-26, 1.2e-12]"
"(LightGBMModel, FPG)","2.9e-20[1.5e-26, 4.9e-19]","7.5e-36[8.1e-38, 9.4e-31]","1.1e-37[4.5e-44, 2.1e-32]",,"0.55[0.031, 0.84]","5.9e-07[4.4e-11, 0.0013]"
"(LightGBMModel, 2hPG)","2.2e-31[2.4e-32, 2.3e-20]","3.4e-38[7.7e-45, 2.2e-33]","7.8e-42[9.3e-53, 1.5e-34]","0.55[0.031, 0.84]",,"1.9e-08[5.4e-11, 7.9e-07]"
"(LightGBMModel, HbA1c)","1.9e-10[2.1e-13, 1.6e-07]","2e-17[1.2e-24, 3e-14]","2.8e-19[2.3e-26, 1.2e-12]","5.9e-07[4.4e-11, 0.0013]","1.9e-08[5.4e-11, 7.9e-07]",


In [6]:
compare_set = [
    ("CHModel", "CH_FPG"),
    ("CHModel", "CH_2hPG"),
    ("CHModel", "CH_HbA1c"),
    ("LightGBMModel", "FPG"),
    ("LightGBMModel", "2hPG"),
    ("LightGBMModel", "HbA1c"),
    ]
compare_p_values(compare_set)

Unnamed: 0,"(CHModel, CH_FPG)","(CHModel, CH_2hPG)","(CHModel, CH_HbA1c)","(LightGBMModel, FPG)","(LightGBMModel, 2hPG)","(LightGBMModel, HbA1c)"
"(CHModel, CH_FPG)",,"0.55[0.026, 0.87]","1.8e-14[2.1e-19, 3.5e-12]","0.0096[0.0052, 0.028]","0.24[0.00079, 0.52]","0.009[1.5e-05, 0.41]"
"(CHModel, CH_2hPG)","0.55[0.026, 0.87]",,"1.2e-18[1.4e-21, 1.6e-16]","0.37[0.16, 0.49]","0.031[0.0061, 0.12]","9e-05[1.8e-05, 0.00048]"
"(CHModel, CH_HbA1c)","1.8e-14[2.1e-19, 3.5e-12]","1.2e-18[1.4e-21, 1.6e-16]",,"4.4e-19[1.5e-21, 2e-14]","8.1e-20[6.8e-24, 9.3e-18]","2.2e-08[1.1e-11, 5e-06]"
"(LightGBMModel, FPG)","0.0096[0.0052, 0.028]","0.37[0.16, 0.49]","4.4e-19[1.5e-21, 2e-14]",,"0.55[0.031, 0.84]","5.9e-07[4.4e-11, 0.0013]"
"(LightGBMModel, 2hPG)","0.24[0.00079, 0.52]","0.031[0.0061, 0.12]","8.1e-20[6.8e-24, 9.3e-18]","0.55[0.031, 0.84]",,"1.9e-08[5.4e-11, 7.9e-07]"
"(LightGBMModel, HbA1c)","0.009[1.5e-05, 0.41]","9e-05[1.8e-05, 0.00048]","2.2e-08[1.1e-11, 5e-06]","5.9e-07[4.4e-11, 0.0013]","1.9e-08[5.4e-11, 7.9e-07]",


In [12]:
compare_set = [
    ("LightGBMModel", "top20_non_lab"),
    ("AutoLightGBMModel2", "top20_non_lab"),
    ("EnsembleModel", "top20_non_lab"),
    ]
compare_p_values(compare_set)

Unnamed: 0,"(LightGBMModel, top20_non_lab)","(AutoLightGBMModel2, top20_non_lab)","(EnsembleModel, top20_non_lab)"
"(LightGBMModel, top20_non_lab)",,"0.8[0.46, 0.93]","0.44[0.078, 0.99]"
"(AutoLightGBMModel2, top20_non_lab)","0.8[0.46, 0.93]",,"0.27[0.091, 0.85]"
"(EnsembleModel, top20_non_lab)","0.44[0.078, 0.99]","0.27[0.091, 0.85]",


In [13]:
compare_set = [
    ("LightGBMModel", "FPG"),
    ("AutoLightGBMModel2", "FPG"),
    ("EnsembleModel", "FPG"),
]
compare_p_values(compare_set)


Unnamed: 0,"(LightGBMModel, FPG)","(AutoLightGBMModel2, FPG)","(EnsembleModel, FPG)"
"(LightGBMModel, FPG)",,"0.47[0.25, 0.79]","0.3[0.075, 0.66]"
"(AutoLightGBMModel2, FPG)","0.47[0.25, 0.79]",,"0.39[0.14, 0.97]"
"(EnsembleModel, FPG)","0.3[0.075, 0.66]","0.39[0.14, 0.97]",
