In [None]:
import pandas as pd
import numpy as np
import shap
from shap import Explanation
import pickle
from plotly import express as ex
import matplotlib.pyplot as plt
import plotly.io as pio
from sklearn.metrics import roc_curve, auc
import warnings

warnings.filterwarnings('ignore')

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_train_OBITO.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_test_OBITO.csv')
y_train = np.load('../PREPROCESSED_FILES/obito_pickles/y_train_OBITO.npy')
y_test = np.load('../PREPROCESSED_FILES/obito_pickles/y_test_OBITO.npy')

In [None]:
model_paths = {
    'SGDClassifier': '../PREDICTION/OBITO/OBITO_PICKLES/SGDClassifier_allfeatures_obito.pkl',
    'Perceptron': '../PREDICTION/OBITO/OBITO_PICKLES/Perceptron_allfeatures_obito.pkl',
    'LinearDiscriminantAnalysis': '../PREDICTION/OBITO/OBITO_PICKLES/LinearDiscriminantAnalysis_allfeatures_obito.pkl',
    'PassiveAggressiveClassifier': '../PREDICTION/OBITO/OBITO_PICKLES/PassiveAggressiveClassifier_allfeatures_obito.pkl',
    'LogisticRegression': '../PREDICTION/OBITO/OBITO_PICKLES/LogisticRegression_allfeatures_obito.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Death - All features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_allfeatures_obito.png', dpi=300)
plt.show()

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_train_Boruta_OBITO.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_test_Boruta_OBITO.csv')
y_train = np.load('../PREPROCESSED_FILES/obito_pickles/y_train_Boruta_OBITO.npy')
y_test = np.load('../PREPROCESSED_FILES/obito_pickles/y_test_Boruta_OBITO.npy')

In [None]:
model_paths = {
    'LinearDiscriminantAnalysis': '../PREDICTION/OBITO/OBITO_PICKLES/LinearDiscriminantAnalysis_boruta_obito.pkl',
    'LinearSVC': '../PREDICTION/OBITO/OBITO_PICKLES/LinearSVC_boruta_obito.pkl',
    'CalibratedClassifirCV': '../PREDICTION/OBITO/OBITO_PICKLES/CalibratedClassifierCV_boruta_obito.pkl',
    'LogisticRegression': '../PREDICTION/OBITO/OBITO_PICKLES/LogisticRegression_boruta_obito.pkl',
    'RidgeClassifierCV': '../PREDICTION/OBITO/OBITO_PICKLES/RidgeClassifierCV_boruta_obito.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Death - Boruta features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_boruta_obito.png', dpi=300)
plt.show()

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_train_liter_OBITO.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/obito_pickles/X_test_liter_OBITO.csv')
y_train = np.load('../PREPROCESSED_FILES/obito_pickles/y_train_liter_OBITO.npy')
y_test = np.load('../PREPROCESSED_FILES/obito_pickles/y_test_liter_OBITO.npy')

In [None]:
model_paths = {
    'GradientBoosting': '../PREDICTION/OBITO/OBITO_PICKLES/GradientBoosting_literature_obito.pkl',
    'LinearDiscriminantAnalysis': '../PREDICTION/OBITO/OBITO_PICKLES/LinearDiscriminantAnalysis_literature_obito.pkl',
    'LinearSVC': '../PREDICTION/OBITO/OBITO_PICKLES/LinearSVC_literature_obito.pkl',
    'LogisticRegression': '../PREDICTION/OBITO/OBITO_PICKLES/LogisticRegression_literature_obito.pkl',
    'SGDClassifier': '../PREDICTION/OBITO/OBITO_PICKLES/SGDClassifier_literature_obito.pkl'

}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Death - Literature features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_literature_obito.png', dpi=300)
plt.show()

### - All Features

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_train_30TEMPOPER.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_test_30TEMPOPER.csv')
y_train = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_train_30TEMPOPER.npy')
y_test = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_test_30TEMPOPER.npy')

In [None]:
model_paths = {
    'CatBoost': '../PREDICTION/PERM30/PERM_PICKLES/CatBoost_allfeatures_30perm.pkl',
    'ExtraTreesClassifier': '../PREDICTION/PERM30/PERM_PICKLES/ExtraTreesClassifier_allfeatures_30perm.pkl',
    'RidgeClassifierCV': '../PREDICTION/PERM30/PERM_PICKLES/RidgeClassifierCV_allfeatures_30perm.pkl',
    'SVC': '../PREDICTION/PERM30/PERM_PICKLES/SVC_allfeatures_30perm.pkl',
    'RandomForestClassifier': '../PREDICTION/PERM30/PERM_PICKLES/RandomForestClassifier_allfeatures_30perm.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Length of Stay - All features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_allfeatures_perm.png', dpi=300)
plt.show()

### - Boruta

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_train_30TEMPOPER_Boruta.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_test_30TEMPOPER_Boruta.csv')
y_train = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_train_30TEMPOPER_Boruta.npy')
y_test = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_test_30TEMPOPER_Boruta.npy')

In [None]:
model_paths = {
    'CalibratedClassifierCV': '../PREDICTION/PERM30/PERM_PICKLES/CalibratedClassifierCV_boruta_30perm.pkl',
    'ExtraTreesClassifier': '../PREDICTION/PERM30/PERM_PICKLES/ExtraTreesClassifier_boruta_30perm.pkl',
    'LinearDiscriminantAnalysis': '../PREDICTION/PERM30/PERM_PICKLES/LinearDiscriminantAnalysis_boruta_30perm.pkl',
    'RandomForestClassifier': '../PREDICTION/PERM30/PERM_PICKLES/RandomForestClassifier_boruta_30perm.pkl',
    'SVC': '../PREDICTION/PERM30/PERM_PICKLES/SVC_boruta_30perm.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Length of Stay - Boruta features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_boruta_perm.png', dpi=300)
plt.show()

### - Literature

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_train_liter_30TEMPOPER.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/tempo_perm/30_pickles/X_test_liter_30TEMPOPER.csv')
y_train = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_train_liter_30TEMPOPER.npy')
y_test = np.load('../PREPROCESSED_FILES/tempo_perm/30_pickles/y_test_liter_30TEMPOPER.npy')

In [None]:
model_paths = {
    'LinearDiscriminantAnalysis': '../PREDICTION/PERM30/PERM_PICKLES/LinearDiscriminantAnalysis_literature_30perm.pkl',
    'RidgeClassifierCV': '../PREDICTION/PERM30/PERM_PICKLES/RidgeClassifierCV_literature_30perm.pkl',
    'RidgeClassifier': '../PREDICTION/PERM30/PERM_PICKLES/RidgeClassifier_literature_30perm.pkl',
    'NuSVC': '../PREDICTION/PERM30/PERM_PICKLES/NuSVC_literature_30perm.pkl',
    'Perceptron': '../PREDICTION/PERM30/PERM_PICKLES/Perceptron_literature_30perm.pkl'

}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Death - Literature features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_literature_perm.png', dpi=300)
plt.show()

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_train_REINT30.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_test_REINT30.csv')
y_train = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_train_REINT30.npy')
y_test = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_test_REINT30.npy')

In [None]:
model_paths = {
    'GaussianNB': '../PREDICTION/REINT/REINT_PICKLES/GaussianNB_allfeatures_reint.pkl',
    'KNeighborsClassifier': '../PREDICTION/REINT/REINT_PICKLES/KNeighborsClassifier_allfeatures_reint.pkl',
    'LabelPropagation': '../PREDICTION/REINT/REINT_PICKLES/LabelPropagation_allfeatures_reint.pkl',
    'LabelSpreading': '../PREDICTION/REINT/REINT_PICKLES/LabelSpreading_allfeatures_reint.pkl',
    'XGBClassifier': '../PREDICTION/REINT/REINT_PICKLES/XGBClassifier_allfeatures_reint.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Readmission - All features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_allfeatures_reint.png', dpi=300)
plt.show()

### - Boruta

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_train_Boruta_REINT30.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_test_Boruta_REINT30.csv')
y_train = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_train_Boruta_REINT30.npy')
y_test = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_test_Boruta_REINT30.npy')

In [None]:
model_paths = {
    'BernoulliNB': '../PREDICTION/REINT/REINT_PICKLES/BernoulliNB_boruta_reint.pkl',
    'CatBoost': '../PREDICTION/REINT/REINT_PICKLES/CatBoost_boruta_reint.pkl',
    'GaussianNB': '../PREDICTION/REINT/REINT_PICKLES/GaussianNB_boruta_reint.pkl',
    'GradientBoosting': '../PREDICTION/REINT/REINT_PICKLES/GradientBoosting_boruta_reint.pkl',
    'LabelSpreading': '../PREDICTION/REINT/REINT_PICKLES/LabelSpreading_boruta_reint.pkl'
}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Readmission - Boruta features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_boruta_reint.png', dpi=300)
plt.show()

### - Literature

In [None]:
X_train = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_train_liter_REINT30.csv')
X_test = pd.read_csv('../PREPROCESSED_FILES/reinternacao_pickles/X_test_liter_REINT30.csv')
y_train = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_train_liter_REINT30.npy')
y_test = np.load('../PREPROCESSED_FILES/reinternacao_pickles/y_test_liter_REINT30.npy')

In [None]:
model_paths = {
    'BernoulliNB': '../PREDICTION/REINT/REINT_PICKLES/BernoulliNB_literature_reint.pkl',
    'KNeighborsClassifier': '../PREDICTION/REINT/REINT_PICKLES/KNeighborsClassifier_literature_reint.pkl',
    'LabelPropagation': '../PREDICTION/REINT/REINT_PICKLES/LabelPropagation_literature_reint.pkl',
    'LabelSpreading': '../PREDICTION/REINT/REINT_PICKLES/LabelSpreading_literature_reint.pkl',
    'NearestCentroid': '../PREDICTION/REINT/REINT_PICKLES/NearestCentroid_literature_reint.pkl'

}

models = {}

for name, path in model_paths.items():
    with open(path, 'rb') as f:
        models[name] = pickle.load(f)

plt.figure(figsize=(8, 6))

for name, model in models.items():
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, "decision_function"):
        y_score = model.decision_function(X_test)
    else:
        raise ValueError(f"Modelo {name} não possui método para calcular probabilidades.")

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.3f})')

# Linha diagonal (classificação aleatória)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', lw=1)

plt.xlabel('False positives')
plt.ylabel('True positives')
plt.title('ROC Cuve: Readmission - Literature features')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.tight_layout()

plt.savefig('roc_literature_reint.png', dpi=300)
plt.show()