In [1]:
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

import numpy as np
from scipy.stats import mode
from sklearn.linear_model import LogisticRegression

from pneumonia.model.base import load_keras_model, load_keras_model_from_task_dir
from pneumonia.model.vgg19 import VGG19
from pneumonia.evaluation.keras import evaluate_keras_model, pred_probas
from pneumonia.evaluation.commons import calculate_scores
from pneumonia.plot import plot_confusion_matrix, plot_corrcoef_matrix

from keras import backend as K

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
vgg19_1 = load_keras_model_from_task_dir(VGG19, "trained_models/VGG19_0.9423")
pred_probas_val_vgg19_1 = pred_probas(vgg19_1.keras_model, vgg19_1.get_val_generator())
pred_probas_test_vgg19_1 = pred_probas(vgg19_1.keras_model, vgg19_1.get_test_generator())
K.clear_session()

Found 1045 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [None]:
vgg19_2 = load_keras_model_from_task_dir(VGG19, "trained_models/VGG19_0.9485")
pred_probas_val_vgg19_2 = pred_probas(vgg19_2.keras_model, vgg19_2.get_val_generator())
pred_probas_test_vgg19_2 = pred_probas(vgg19_2.keras_model, vgg19_2.get_test_generator())
K.clear_session()

In [None]:
vgg19_3 = load_keras_model_from_task_dir(VGG19, "trained_models/VGG19_0.9513")
pred_probas_val_vgg19_3 = pred_probas(vgg19_3.keras_model, vgg19_3.get_val_generator())
pred_probas_test_vgg19_3 = pred_probas(vgg19_3.keras_model, vgg19_3.get_test_generator())
K.clear_session()

In [None]:
plot_corrcoef_matrix(np.corrcoef([pred_probas_val_vgg19_1,
                                  pred_probas_val_vgg19_2,
                                  pred_probas_val_vgg19_3]),
                     model_names=["vgg19_1", "vgg19_2", "vgg19_3"]).show()

In [None]:
plot_corrcoef_matrix(np.corrcoef([pred_probas_test_vgg19_1,
                                  pred_probas_test_vgg19_2,
                                  pred_probas_test_vgg19_3]),
                     model_names=["vgg19_1", "vgg19_2", "vgg19_3"]).show()

In [None]:
def print_scores(trues, preds):
    acc, precision, recall, f1_score, confusion_matrix = calculate_scores(trues, preds)
    print(f"acc: {acc}")
    print(f"precision: {precision}")
    print(f"recall: {recall}")
    print(f"f1 score: {f1_score}")
    print(f"confusion matrix: {confusion_matrix}")

In [None]:
val_trues = vgg19_1.get_val_generator().classes
test_trues = vgg19_1.get_test_generator().classes

In [None]:
pred_val_vgg19_1 = (pred_probas_val_vgg19_1 > 0.99).astype(int)
pred_val_vgg19_2 = (pred_probas_val_vgg19_2 > 0.99).astype(int)
pred_val_vgg19_3 = (pred_probas_val_vgg19_3 > 0.99).astype(int)

In [None]:
pred_test_vgg19_1 = (pred_probas_test_vgg19_1 > 0.98).astype(int)
pred_test_vgg19_2 = (pred_probas_test_vgg19_2 > 0.99).astype(int)
pred_test_vgg19_3 = (pred_probas_test_vgg19_3 > 0.99).astype(int)

In [None]:
print_scores(pred_test_vgg19_1, test_trues)

In [None]:
print_scores(pred_test_vgg19_2, test_trues)

In [None]:
print_scores(pred_test_vgg19_3, test_trues)

## Primeira tentativa de ensemble: votação

In [None]:
ensemble_1_val = np.column_stack((pred_val_vgg19_1, pred_val_vgg19_2,
                              pred_val_vgg19_3))
ensemble_1_val_preds = mode(ensemble_1_val, axis=1).mode.flatten()
print_scores(ensemble_1_val_preds, val_trues)

In [None]:
ensemble_1_test = np.column_stack((pred_test_vgg19_1, pred_test_vgg19_2,
                              pred_test_vgg19_3))
ensemble_1_test_preds = mode(ensemble_1_test, axis=1).mode.flatten()
print_scores(ensemble_1_test_preds, test_trues)

## Segunda tentativa de ensemble: média de probabilidades

In [None]:
ensemble_2_val = np.column_stack((pred_probas_val_vgg19_1, pred_probas_val_vgg19_2,
                              pred_probas_val_vgg19_3))
ensemble_2_val_preds = (np.average(ensemble_2_val, axis=1) > 0.74).astype(int)
print_scores(ensemble_2_val_preds, val_trues)

In [None]:
for val in np.arange(0.5, 0.99, 0.01):
    ensemble_2_test = np.column_stack((pred_probas_test_vgg19_1, pred_probas_test_vgg19_2, pred_probas_test_vgg19_2))
    ensemble_2_test_preds = (np.average(ensemble_2_test, axis=1) > val).astype(int)
    print(val)
    print_scores(ensemble_2_test_preds, test_trues)

## Terceira tentativa de ensemble: regressão logística

In [None]:
ensemble_3_val = np.column_stack((pred_probas_val_vgg19_1, pred_probas_val_vgg19_2,
                              pred_probas_val_vgg19_3))
ensemble_3_test = np.column_stack((pred_probas_val_vgg19_1, pred_probas_val_vgg19_2,
                              pred_probas_val_vgg19_3))

In [None]:
ensemble_3_clf = LogisticRegression(class_weight="balanced", C=0.1, random_state=42)
ensemble_3_clf.fit(ensemble_3_val, val_trues)

In [None]:
ensemble_3_clf.intercept_

In [None]:
ensemble_3_clf.coef_

In [None]:
ensemble_3_val_pred_probas = ensemble_3_clf.predict_proba(ensemble_3_val)[:,1]
ensemble_3_val_preds = (ensemble_3_val_pred_probas > 0.95).astype(int)
print_scores(ensemble_3_val_preds, val_trues)

In [None]:
for val in np.arange(0.5, 0.95, 0.01):
    ensemble_3_test_pred_probas = ensemble_3_clf.predict_proba(ensemble_3_test)[:,1]
    ensemble_3_test_preds = (ensemble_3_test_pred_probas > val).astype(int)
    print(val)
    print_scores(ensemble_3_test_preds, test_trues)

## Terceira tentativa de ensemble: regressão logística com votos

In [None]:
ensemble_4_val_pred = np.column_stack((pred_val_vgg19_1, pred_val_vgg19_2, pred_val_vgg19_3))
ensemble_4_test_pred = np.column_stack((pred_val_vgg19_1, pred_val_vgg19_2, pred_val_vgg19_3))

In [None]:
ensemble_4_clf = LogisticRegression(class_weight="balanced", C=0.1, random_state=42)
ensemble_4_clf.fit(ensemble_4_val_pred, val_trues)

In [None]:
ensemble_4_clf.intercept_

In [None]:
ensemble_4_clf.coef_

In [None]:
ensemble_4_val_pred_probas = ensemble_4_clf.predict_proba(ensemble_4_val_pred)[:,1]
ensemble_4_val_preds = (ensemble_3_val_pred_probas > 0.99).astype(int)
print_scores(ensemble_3_val_preds, val_trues)

In [None]:
for val in np.arange(0.5, 0.99, 0.01):
    ensemble_4_test_pred_probas = ensemble_4_clf.predict_proba(ensemble_4_test_pred)[:,1]
    ensemble_4_test_preds = (ensemble_4_test_pred_probas > val).astype(int)
    print(val)
    print_scores(ensemble_4_test_preds, test_trues)