In [None]:
from lxh_prediction.plot import plot_curve, plt
from lxh_prediction.curves_auc import auROCNonLab, auPRNonLab


In [None]:
# Figure 3a AUC

exp = auROCNonLab(retrain=False)
exp.run("LGBM Model", "LightGBMModel", "full_non_lab")
exp.run("ANN Model", "ANNModel", "full_non_lab")
exp.run("RF Model", "RandomForestModel", "full_non_lab")
exp.run("SVM Model", "SVMModel", "full_non_lab")
exp.run("LR Model", "LogisticRegressionModel", "full_non_lab")

exp.plot()
plot_curve(
    (0, 1),
    (0, 1),
    ylim=(0, 1),
    xlabel="1-Specificity",
    ylabel="Sensitivity",
    color="navy",
    lw=2,
    linestyle="--",
    name="Random",
)

exp.save("figure3a_auc")


In [None]:
# Figure 3a auPR

exp = auPRNonLab(retrain=False)
exp.run("LGBM Model", "LightGBMModel", "full_non_lab")
exp.run("ANN Model", "ANNModel", "full_non_lab")
exp.run("RF Model", "RandomForestModel", "full_non_lab")
exp.run("SVM Model", "SVMModel", "full_non_lab")
exp.run("LR Model", "LogisticRegressionModel", "full_non_lab")

exp.plot()
plt.legend(loc="upper right")

exp.save("figure3a_auPR")


In [None]:
# Figure 3b AUC

exp = auROCNonLab(retrain=False)
exp.run("Full Model", "LightGBMModel", "full_non_lab")
exp.run("Top-25 Model", "LightGBMModel", "top25_non_lab")
exp.run("Top-20 Model", "LightGBMModel", "top20_non_lab")
exp.run("Top-15 Model", "LightGBMModel", "top15_non_lab")
exp.run("Top-10 Model", "LightGBMModel", "top10_non_lab")
exp.run("Top-5 Model", "LightGBMModel", "top5_non_lab")
exp.plot()
plot_curve(
    (0, 1),
    (0, 1),
    ylim=(0, 1),
    xlabel="1-Specificity",
    ylabel="Sensitivity",
    color="navy",
    lw=2,
    linestyle="--",
    name="Random",
)

exp.save("figure3b_auc")


In [None]:
# Figure 3b auPR

exp = auPRNonLab(retrain=False)
exp.run("Full Model", "LightGBMModel", "full_non_lab")
exp.run("Top-25 Model", "LightGBMModel", "top25_non_lab")
exp.run("Top-20 Model", "LightGBMModel", "top20_non_lab")
exp.run("Top-15 Model", "LightGBMModel", "top15_non_lab")
exp.run("Top-10 Model", "LightGBMModel", "top10_non_lab")
exp.run("Top-5 Model", "LightGBMModel", "top5_non_lab")

exp.plot()
plt.legend(loc="upper right")

exp.save("figure3b_auPR")


### Figure 3c Feature ranking using Shap

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import shap

from lxh_prediction.explain_model import explain_with_shape_lgbm


feat_collection = "full_non_lab"
explainer, X, feature_names = explain_with_shape_lgbm(feat_collection)

shap_values = explainer.shap_values(X)[1]
expected_value = explainer.expected_value[1]

fig = plt.figure(figsize=(4, 4), dpi=300)
shap.summary_plot(
    shap_values, X, max_display=20, plot_type="bar", feature_names=feature_names,
)


### Figure 3 d-f

In [None]:
import pandas as pd

# Reload data for Top20 features only
feat_collection = "top20_non_lab"
explainer, X, feature_names = explain_with_shape_lgbm(feat_collection)

shap_values = explainer.shap_values(X)[1]
expected_value = explainer.expected_value[1]


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


shap_v = pd.DataFrame(shap_values, columns=X.columns)
phi0 = expected_value

RR = sigmoid(shap_v + phi0) / sigmoid(phi0)


In [None]:
name = "RPR"
fig = plt.figure(figsize=(5, 5), dpi=100)
plt.xlim((40, 140))
shap.dependence_plot(
    name,
    RR.values,
    X,
    display_features=X,
    interaction_index=None,
    feature_names=feature_names,
    ax=fig.gca(),
    show=False,
)
fig.gca().set_ylabel("Relative Risk for diabetes")


In [None]:
name = "Age"
fig = plt.figure(figsize=(5, 5), dpi=100)
shap.dependence_plot(
    name,
    RR.values,
    X,
    display_features=X,
    interaction_index=None,
    feature_names=feature_names,
    ax=fig.gca(),
    show=False,
)
fig.gca().set_ylabel("Relative Risk for diabetes")


In [None]:
name = "WHR"
fig = plt.figure(figsize=(5, 5), dpi=100)
shap.dependence_plot(
    name,
    RR.values,
    X,
    display_features=X,
    interaction_index=None,
    feature_names=feature_names,
    ax=fig.gca(),
    show=False,
)
fig.gca().set_ylabel("Relative Risk for diabetes")
