In [None]:
from google.colab import drive
drive.mount("/content/drive")

# Descarga del dataset

In [None]:
# Instalar y tener acceso a kaggle mediante
!pip install kaggle
!mkdir ~/.kaggle
!cp /content/drive/MyDrive/chest-xray-classification/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Descarga del dataset Chest X-Ray Images (Pneumonia)
# link: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
!kaggle datasets download paultimothymooney/chest-xray-pneumonia
!unzip -q chest-xray-pneumonia.zip

In [None]:
# Eliminado de ficheros y carpetas innecesarias
!rm chest-xray-pneumonia.zip
!rm -rf chest_xray/__MACOSX/
!rm -rf chest_xray/chest_xray/

# Importar las librerías

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint

# Construir el modelo CNN

In [None]:
target_size=(256, 256)

classifier = Sequential()

classifier.add(Conv2D(filters=32,  # Número de mapas de características
                      kernel_size=(3, 3),  # Tamaño del detector de rasgos
                      input_shape=(*target_size, 3),
                      activation="relu"))
classifier.add(MaxPooling2D(pool_size=(2, 2)))  # Mapa de características Pooled
classifier.add(Conv2D(filters=64,
                      kernel_size=(3, 3),
                      activation="relu"))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

classifier.add(Flatten())

classifier.add(Dense(units=128, activation="relu"))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=1, activation="sigmoid"))

In [None]:
classifier.compile(optimizer="adam",
                   loss="binary_crossentropy",
                   metrics=["accuracy"])

# Ajustar la CNN a las imágenes para entrenar

In [None]:
train_directory = "chest_xray/train"
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
    )

training_dataset = train_datagen.flow_from_directory(
    directory=train_directory,
    shuffle=True,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="binary",
    subset="training"
    )

validation_dataset = train_datagen.flow_from_directory(
    directory=train_directory,
    shuffle=True,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="binary",
    subset="validation"
    )

In [None]:
train_images, train_labels = next(training_dataset)
label_names = {
    0: "Normal",
    1: "Pneumonia"
}

In [None]:
# Crear una matriz de 30 imágenes junto con sus etiquetas correspondientes
L = 6
W = 5

fig, axes = plt.subplots(L, W, figsize=(12, 12))
axes = axes.ravel()

for i in np.arange(0, L*W):
    axes[i].imshow(train_images[i])
    axes[i].set_title(label_names[train_labels[i]])
    axes[i].axis("off")

plt.subplots_adjust(wspace=0.5)

In [None]:
modelpath = '/content/drive/MyDrive/chest-xray-classification/mymodel.hdf5'
checkpoint = ModelCheckpoint(filepath=modelpath,
                             monitor="val_loss",
                             mode="min",
                             save_best_only=True,
                             verbose=1)

In [None]:
history = classifier.fit(
    training_dataset,
    steps_per_epoch=training_dataset.n//batch_size,
    epochs=25,
    validation_data=validation_dataset,
    validation_steps=validation_dataset.n//batch_size,
    callbacks=[checkpoint]
    )

# Visualizar evolución del loss y accuracy a lo largo de las epochs

In [None]:
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.legend()
plt.title("Accuracy vs Training epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()

In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.title("Cross Entropy loss vs Training epochs")
plt.xlabel("Epochs")
plt.ylabel("Cross Entropy loss")
plt.show()

# Predicciones sobre el conjunto de test

In [None]:
model = load_model(filepath=modelpath)

In [None]:
test_directory = "chest_xray/test"

test_datagen = ImageDataGenerator(rescale=1./255)
testing_dataset = test_datagen.flow_from_directory(
    directory=test_directory,
    shuffle=True,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="binary"
    )

evaluate = model.evaluate(testing_dataset, steps=testing_dataset.n//batch_size, verbose=1)

print("Precisión en la fase de test: {}".format(evaluate[1]))

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import os
import cv2

prediction = []
original = []
image = []
threshold = 0.5

for label in training_dataset.class_indices:
    folder_path = os.path.join(test_directory, label)
    for item in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, item))
        img = cv2.resize(img, (256, 256))
        image.append(img)
        img = img/255
        img = img.reshape(-1, 256, 256, 3)
        predict = model.predict(img)
        predict = int(predict > threshold)
        prediction.append(predict)
        original.append(training_dataset.class_indices[label])

In [None]:
score = accuracy_score(original, prediction)
print("Eficacia de la predicción: {}".format(score.round(4)))

In [None]:
L, W = 8, 6

fig, axes = plt.subplots(L, W, figsize=(16, 12))
axes = axes.ravel()

for i in np.arange(L*W):
    axes[i].imshow(image[i])
    pred_label = label_names[prediction[i]]
    true_label = label_names[original[i]]
    axes[i].set_title("Pred={}\nTrue={}".format(pred_label, true_label))
    axes[i].axis('off')

plt.subplots_adjust(wspace=1.2, hspace=1.5)

# Matriz de Confusión

In [None]:
print(classification_report(original, prediction))

In [None]:
cm = confusion_matrix(original, prediction)
ax = plt.subplot()

sns.heatmap(cm, annot=True, ax=ax, fmt=".0f", cmap="coolwarm")
ax.set_xticklabels(list(label_names.values()))
ax.set_yticklabels(list(label_names.values()))
ax.set_xlabel("Predicciones")
ax.set_ylabel("Original")
ax.set_title("Matriz de Confusión")
plt.show()

# Curva ROC

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score

fpr, tpr, _ = roc_curve(original, prediction)
auc_score = roc_auc_score(original, prediction).round(4)

print("AUC: ", auc_score)

In [None]:
roc_curve(original, prediction)

In [None]:
plt.plot(fpr, tpr, linestyle="-", color="blue", label="Modelo CNN")

plt.title("Curva Característica Operativa del Receptor (ROC)")
plt.xlabel("Tasa de falsos positivos")
plt.ylabel("Tasa de verdaderos positivos")
plt.legend(loc="best")
plt.show()