# Detección de neumonía usando CNN

En este *Notebook* se aborda la clasificación de la neumonía mediante CNN (red neuronal convolucional). Además, también se experimentará con valores de umbral.

La neumonía es una enfermedad inflamatoria de los pulmones que afecta principalmente a los pequeños sacos de aire, conocidos como alvéolos. Por lo general, los síntomas incluyen una combinación de tos productiva o seca, dolor en el pecho, fiebre y dificultad para respirar.

Obtenga más información en: <a href='https://www.who.int/news-room/fact-sheets/detail/pneumonia'>Organización Mundial de la Salud</a>

<center><img src='https://upload.wikimedia.org/wikipedia/commons/thumb/2/2a/Chest_X-ray_in_influenza_and_Haemophilus_influenzae_-_annotated.jpg/1200px-Chest_X-ray_in_influenza_and_Haemophilus_influenzae_-_annotated.jpg' alt='Pneumonia' height='800' width='300'> </center>
<p>Fuente: Wikipedia</p>

## Importación de módulo y librerías

In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import precision_recall_curve, roc_curve, accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
import seaborn as sns
plt.style.use('ggplot')
import pickle 
import os 
import numpy as np
import cv2 
%matplotlib inline

## Procesa las imágenes y redimensionalas al tamaño preferido

In [2]:
labels = ['PNEUMONIA', 'NORMAL']
img_size = 200
def get_training_data(data_dir):
    dataX = [] 
    dataY = []
    for label in labels: 
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                dataX.append(resized_arr)
                dataY.append(class_num)
            except Exception as e:
                print(e)
    return dataX, dataY

## Preparación de los datos de entrenamiento y prueba

In [3]:
trainX, trainY = get_training_data('chest_xray/train')
testX, testY = get_training_data('chest_xray/test')
valX, valY = get_training_data('chest_xray/val')

OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'

OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'



In [4]:
pnenumonia = 0 
normal = 0 

for j in trainY:
    if j == 0:
        pnenumonia+=1
    else:
        normal+=1
        
print('Pneumonia:', pnenumonia)
print('Normal:', normal)
print('Pneumonia - Normal:', pnenumonia-normal)

Pneumonia: 3875
Normal: 1341
Pneumonia - Normal: 2534


## Visualizar imágenes de entrenamiento

In [5]:
plt.imshow(trainX[4000], cmap='gray')
print(labels[trainY[4000]])

NORMAL


Estamos incorporando los datos de validación en los datos de entrenamiento porque no contienen suficientes ejemplos.

In [6]:
X = []
y = []

for feature in trainX:
    X.append(feature)
    
for label in trainY:
    y.append(label)
    
for feature in testX:
    X.append(feature)

for label in testY:
    y.append(label)
    
for feature in valX:
    X.append(feature)

for label in valY:
    y.append(label)

X = np.array(X).reshape(-1, img_size, img_size, 1)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [7]:
X_train = X_train / 255
X_test = X_test / 255

## CNN (Red neuronal convolucional)

<center><img src='cnn.PNG' alt='CNN'> </center>

In [8]:
model = Sequential()

model.add(Conv2D(256, (3, 3), input_shape=X_train.shape[1:], padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Conv2D(16, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Flatten())

model.add(Dropout(0.5))
model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid')) # cambiar a 'lineal' para regresión

early_stop = EarlyStopping(patience=3, monitor='val_loss')
adam = Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy',optimizer=adam,metrics=['acc'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.summary()

In [None]:
history = model.fit(X_train, y_train, batch_size=15, epochs=5, validation_split=0.20, callbacks=[early_stop])

Epoch 1/5
[1m114/250[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m3:05[0m 1s/step - acc: 0.6908 - loss: 0.7801

In [None]:
model.evaluate(X_test, y_test)

## Visualizando nuestro progreso en el entrenamiento

In [None]:
plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['acc'])
plt.title('Model Accuracy')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['loss'])
plt.title('Model Loss')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['val_acc'])
plt.title('Model Validation Accuracy')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['val_loss'])
plt.title('Model Validation Loss')
plt.legend(['train'], loc='upper left')
plt.show()

## Preparar los datos para *precision* vs. *recall* y ROC

In [None]:
pred = model.predict(X_train)
precisions, recalls, thresholds = precision_recall_curve(y_train, pred)
fpr, tpr, thresholds2 = roc_curve(y_train, pred)

In [None]:
def plot_precision_recall(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], 'b--')
    plt.plot(thresholds, recalls[:-1], 'g-')
    plt.title('Precision vs. Recall')
    plt.xlabel('Thresholds')
    plt.legend(['Precision', 'Recall'], loc='best')
    plt.show()

def plot_roc(fpr, tpr):
    plt.plot(fpr, tpr)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.title('FPR (False Positive rate) vs TPR (True Positive Rate)')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate (Recall)')
    plt.show()
    
plot_precision_recall(precisions, recalls, thresholds)
plot_roc(fpr, tpr)

In [None]:
predictions = model.predict(X_test)

## Establecimiento de umbrales

Queremos que los resultados sean precisos sin sacrificar demasiado el *recall*.

In [None]:
binary_predictions = []
threshold = thresholds[np.argmax(precisions >= 0.95)]
for i in predictions:
    if i >= threshold:
        binary_predictions.append(1)
    else:
        binary_predictions.append(0) 

In [None]:
print('Accuracy on testing set:', accuracy_score(binary_predictions, y_test))
print('Precision on testing set:', precision_score(binary_predictions, y_test))
print('Recall on testing set:', recall_score(binary_predictions, y_test))

## Gráfico de la matriz de confusión

Aquí se explica cómo interpretarla.

<center><img src='conf_mat.PNG' alt='Matriz de confusión'> </center>

In [None]:
matrix = confusion_matrix(binary_predictions, y_test)
plt.figure(figsize=(16, 9))
ax= plt.subplot()
sns.heatmap(matrix, annot=True, ax = ax)

# etiquetas, título y marcas
ax.set_xlabel('Predicted Labels', size=20)
ax.set_ylabel('True Labels', size=20)
ax.set_title('Confusion Matrix', size=20) 
ax.xaxis.set_ticklabels(labels)
ax.yaxis.set_ticklabels(labels)

## Visualización de resultados

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train.reshape(-1, img_size, img_size)[i], cmap='gray')
    if(binary_predictions[i]==y_test[i]):
        plt.xlabel(labels[binary_predictions[i]], color='blue')
    else:
        plt.xlabel(labels[binary_predictions[i]], color='red')
plt.show()

## Descarga del modelo

In [None]:
 model.save('pneumonia_detection_ai_version_2.h5')