SHAP será executado pelo método que performou melhor em cada desfecho.

- Óbito: LogisticRegression - Literatura

- Permanência (30 dias): ExtraTrees - Boruta

- Reinternacao: KNeighbors - Literatura

In [None]:
import pandas as pd
import numpy as np
import shap
from shap import Explanation
import pickle
from plotly import express as ex
import matplotlib.pyplot as plt
import plotly.io as pio
import os
from scipy.special import expit


import warnings
warnings.filterwarnings('ignore')

# 1. Óbito: LogisticRegression

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_train_liter_OBITO.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_test_liter_OBITO.csv')
y_train = np.load('../PREPROCESSED_FILES/obito_pickles/y_train_liter_OBITO.npy')
y_test = np.load('../PREPROCESSED_FILES/obito_pickles/y_test_liter_OBITO.npy')

In [None]:
path = '../PREDICTION/OBITO/OBITO_PICKLES/LogisticRegression_literature_obito.pkl'

with open(path, 'rb') as f:
    classifier = pickle.load(f)

In [None]:
background = X_train.sample(50)

f = lambda x: classifier.predict_proba(x)[:, 1]

explainer = shap.KernelExplainer(f, background)

shap_values = explainer.shap_values(X_test)

In [None]:
shap_exp = Explanation(values=shap_values, data=X_test.values, feature_names=X_test.columns)

traducao = {
    "idade_internacao": "age_at_admission",
    "cid_primario_internacao_vec_147": "primary_diagnosis_admission_vec_147",
    "primeiro_ureia": "first_urea",
    "cid_secundario_internacao_vec_117": "secondary_diagnosis_admission_vec_117",
    "primeiro_albumina": "first_albumin",
    "cid_primario_internacao_vec_184": "primary_diagnosis_admission_vec_184",
    "cid_primario_internacao_vec_185": "primary_diagnosis_admission_vec_185",
    "cid_secundario_internacao_vec_65": "secondary_diagnosis_admission_vec_65",
    "cid_primario_internacao_vec_41": "primary_diagnosis_admission_vec_41",
    "cid_primario_internacao_vec_67": "primary_diagnosis_admission_vec_67",
    "cid_primario_internacao_vec_223": "primary_diagnosis_admission_vec_223",
    "cid_primario_internacao_vec_222": "primary_diagnosis_admission_vec_222",
    "cid_secundario_internacao_vec_117": "secondary_diagnosis_admission_vec_117",
}

shap_exp.feature_names = [
    traducao.get(name, name) for name in shap_exp.feature_names
]

output_svg = "beeswarm_obito.svg"

shap.plots.beeswarm(shap_exp, show=False)
fig = plt.gcf()
fig.savefig(output_svg, format='svg', bbox_inches='tight')

print(f"Beeswarm salvo em: {os.path.abspath(output_svg)}")

In [None]:
vector_groups = {
    vector_group:[column for column in X_test.columns if column.startswith(vector_group)]
    for vector_group in 
    set([column.split("_vec_")[0] for column in X_test.columns if '_vec_' in column])
}

non_vec_features = [column for column in X_test.columns if '_vec_' not in column]
all_features = [*non_vec_features, *vector_groups.keys()]

n_features = len(non_vec_features) + len(vector_groups)

shap_values_new = np.zeros(shape=(X_test.shape[0], n_features))

for f, feature in enumerate(all_features):
    if feature in non_vec_features:
        old_feature_index = list(X_test.columns).index(feature)
        new_feature_index = all_features.index(feature)
        shap_values_new[:, new_feature_index] = shap_exp.values[:, old_feature_index]
    else:
        feature_values = np.zeros(X_test.shape[0])
        for vector_value in vector_groups[feature]:
            old_feature_index = list(X_test.columns).index(vector_value)
            new_feature_index = all_features.index(feature)
            shap_values_new[:, new_feature_index] += shap_exp.values[:, old_feature_index]

df_features = pd.DataFrame({'feature': all_features, 'shap_values': abs(shap_values_new).mean(axis=0)}).query('shap_values > 0').sort_values(by='shap_values', ascending=True)
df_features.rename(columns={'shap_values': 'mean|shap_values|'}, inplace=True)

fig = ex.bar(df_features, x='mean|shap_values|', y='feature', orientation='h')

fig.update_layout(
        plot_bgcolor="white",  
        paper_bgcolor="white",
        width=2000, 
        height=1200
    )

fig.write_image("shap_obito.png", scale=3)

# 2. Permanência (30 dias): ExtraTrees

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_train_30TEMPOPER_Boruta.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_test_30TEMPOPER_Boruta.csv')

X_train.dropna(axis=1, how="all", inplace=True)
X_test.dropna(axis=1, how="all", inplace=True)

y_train = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_train_30TEMPOPER_Boruta.npy')
y_test = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_test_30TEMPOPER_Boruta.npy')

In [None]:
path = '../PREDICTION/PERM30/PERM_PICKLES/ExtraTreesClassifier_boruta_30perm.pkl'

with open(path, 'rb') as f:
    classifier = pickle.load(f)

In [None]:
explainer = shap.TreeExplainer(classifier)

shap_values = explainer(X_test)

traducao = {
    "especialidade_internacao_vec_3": "admission_specialty_vec_3",
    "sexo": "sex",
    "FC__anamnese_enfermagem_NORMOCARDICO": "heart_rate_nursing_record_normocardic",
    "especialidade_internacao_vec_156": "admission_specialty_vec_156",
    "especialidade_internacao_vec_209": "admission_specialty_vec_209",
    "especialidade_internacao_vec_68": "admission_specialty_vec_68",
    "especialidade_internacao_vec_215": "admission_specialty_vec_215",
    "especialidade_internacao_vec_122": "admission_specialty_vec_122",
    "especialidade_internacao_vec_65": "admission_specialty_vec_65",
    "cid_secundario_internacao_vec_65": "secondary_diagnosis_admission_vec_65"
}

shap_values.feature_names = [
    traducao.get(name, name) for name in shap_values.feature_names
]

plt.figure(figsize=(8,6))
shap.plots.beeswarm(shap_values[..., 1], show=False)

output_svg = "beeswarm_perm.svg"
fig = plt.gcf()
fig.savefig(output_svg, format='svg', bbox_inches='tight')

print(f"✅ Beeswarm salvo em: {os.path.abspath(output_svg)}")

In [None]:
vector_groups = {
    vector_group:[column for column in X_test.columns if column.startswith(vector_group)]
    for vector_group in 
    set([column.split("_vec_")[0] for column in X_test.columns if '_vec_' in column])
}

non_vec_features = [column for column in X_test.columns if '_vec_' not in column]
all_features = [*non_vec_features, *vector_groups.keys()]

n_features = len(non_vec_features) + len(vector_groups)

shap_values_new = np.zeros(shape=(X_test.shape[0], n_features))

for f, feature in enumerate(all_features):
    if feature in non_vec_features:
        old_feature_index = list(X_test.columns).index(feature)
        new_feature_index = all_features.index(feature)
        shap_values_new[:, new_feature_index] = shap_values.values[:, old_feature_index, 1]
    else:
        feature_values = np.zeros(X_test.shape[0])
        for vector_value in vector_groups[feature]:
            old_feature_index = list(X_test.columns).index(vector_value)
            new_feature_index = all_features.index(feature)
            shap_values_new[:, new_feature_index] += shap_values.values[:, old_feature_index, 1]

df_features = pd.DataFrame({'feature': all_features, 'shap_values': abs(shap_values_new).mean(axis=0)}).query('shap_values > 0').sort_values(by='shap_values', ascending=True)

df_features.rename(columns={'shap_values': 'mean|shap_values|'}, inplace=True)

fig = ex.bar(df_features, x='mean|shap_values|', y='feature', orientation='h')

fig.update_layout(
        plot_bgcolor="white",  # Example: light gray for the plot area
        paper_bgcolor="white",  # Example: light blue for the surrounding area
    )

fig.write_image("shap_permanencia.svg")

# 3. Reinternação: KNeighborsClassifier

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_train_liter_REINT30.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_test_liter_REINT30.csv')
y_train = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_train_liter_REINT30.npy')
y_test = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_test_liter_REINT30.npy')

In [None]:
path = '../PREDICTION/REINT/REINT_PICKLES/KNeighborsClassifier_literature_reint.pkl'

with open(path, 'rb') as f:
    classifier = pickle.load(f)
    
y_pred = classifier.predict(X_test)

y_proba = classifier.predict_proba(X_test)[:, 1]

In [None]:
background = X_train.sample(50)

f = lambda x: classifier.predict_proba(x)[:, 1]

explainer = shap.KernelExplainer(f, background)

shap_values = explainer.shap_values(X_test)

In [None]:
shap_exp = Explanation(values=shap_values, data=X_test.values, feature_names=X_test.columns)

traducao = {
    "primeiro_hemoglobina": "first_hemoglobin",
    "idade_internacao": "age_at_admission",
    "cid_primario_internacao_vec_80": "primary_diagnosis_admission_vec_80",
    "cid_primario_internacao_vec_119": "primary_diagnosis_admission_vec_119",
    "sexo": "sex",
    "FC__anamnese_enfermagem_BRAQUICARDIA": "heart_rate_nursing_record_bradicardia",
    "cid_primario_internacao_vec_101": "primary_diagnosis_admission_vec_101",
    "cid_primario_internacao_vec_245": "primary_diagnosis_admission_vec_245",
    "cid_primario_internacao_vec_192": "primary_diagnosis_admission_vec_192",
    "especialidade_internacao_vec_9":  "admission_specialty_vec_9",
    "especialidade_internacao_vec_284":  "admission_specialty_vec_284",
    "cid_primario_internacao_vec_41": "primary_diagnosis_admission_vec_41",
    "primeiro_leococitos_hemograma": "first_leukocytes_blood_count",
    "cid_primario_internacao_vec_79": "primary_diagnosis_admission_vec_79",

}

shap_exp.feature_names = [
    traducao.get(name, name) for name in shap_exp.feature_names
]

output_svg = "beeswarm_reint.svg"

shap.plots.beeswarm(shap_exp, show=False)
fig = plt.gcf()
fig.savefig(output_svg, format='svg', bbox_inches='tight')

print(f"Beeswarm salvo em: {os.path.abspath(output_svg)}")

In [None]:
vector_groups = {
    vector_group:[column for column in X_test.columns if column.startswith(vector_group)]
    for vector_group in 
    set([column.split("_vec_")[0] for column in X_test.columns if '_vec_' in column])
}

non_vec_features = [column for column in X_test.columns if '_vec_' not in column]
all_features = [*non_vec_features, *vector_groups.keys()]

n_features = len(non_vec_features) + len(vector_groups)

shap_values_new = np.zeros(shape=(X_test.shape[0], n_features))

for f, feature in enumerate(all_features):
    if feature in non_vec_features:
        old_feature_index = list(X_test.columns).index(feature)
        new_feature_index = all_features.index(feature)
        shap_values_new[:, new_feature_index] = shap_exp.values[:, old_feature_index]
    else:
        feature_values = np.zeros(X_test.shape[0])
        for vector_value in vector_groups[feature]:
            old_feature_index = list(X_test.columns).index(vector_value)
            new_feature_index = all_features.index(feature)
            shap_values_new[:, new_feature_index] += shap_exp.values[:, old_feature_index]

df_features = pd.DataFrame({'feature': all_features, 'shap_values': abs(shap_values_new).mean(axis=0)}).query('shap_values > 0').sort_values(by='shap_values', ascending=True)

df_features.rename(columns={'shap_values': 'mean|shap_values|'}, inplace=True)

fig = ex.bar(df_features, x='mean|shap_values|', y='feature', orientation='h')

fig.update_layout(
        plot_bgcolor="white",  
        paper_bgcolor="white",
        width=2000, 
        height=1200
    )

fig.write_image("shap_reint.png", scale=3)