https://www.tensorflow.org/tutorials/images/classification

https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia

# Libraries

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from sklearn.metrics import plot_confusion_matrix
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
import seaborn as sns

# Read images

In [None]:
folders = ['PNEUMONIA', 'NORMAL']
img_size = 150

def get_data(data_dir):
    x = []
    y = []
    for folder in folders:
        path = os.path.join(data_dir, folder)
        class_num = folders.index(folder)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                x.append(resized_arr)
                y.append(class_num)
            except Exception as e:
                print(e)
    return x,y

In [None]:
x_train, y_train = get_data('dataset/chest_xray/train')
x_test, y_test  = get_data('dataset/chest_xray/test')
x_val, y_val = get_data('dataset/chest_xray/val')

In [None]:
def dataset_count(y):
    labels = []
    for label in y:
        if(label == 0):
            labels.append("Pneumonia")
        else:
            labels.append("Normal")
    print(labels.count('Pneumonia'))
    sns.countplot(x=labels)

dataset_count(y_train)
print(len(y_train))
print(len(y_test))
print(len(y_val))


In [None]:
def oversample(x, y):
  sample = RandomOverSampler(sampling_strategy='minority')
  flat = []
  for image in x:
    flat.append(image.flatten())
  x_over , y = sample.fit_resample(flat, y)
  x = []
  for image in x_over:
    x.append(np.array(image).reshape(150, 150))
  return x, y

x_train, y_train = oversample(x_train,y_train)
dataset_count(y_train)

In [None]:
x_train = np.array(x_train) / 255
x_val = np.array(x_val) / 255
x_test = np.array(x_test) / 255

x_train = x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_val = x_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

x_test = x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [None]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True)
image_generator.fit(x_train)




In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(img_size, img_size, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])


In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 7
history = model.fit(
    image_generator.flow(x_train, y_train, batch_size=32),
    validation_data=image_generator.flow(x_val, y_val),
    epochs=epochs
)

# history = model.fit(
#     np.array(x_train), np.array(y_train),
#     validation_data=(np.array(x_val), np.array(y_val)),
#     epochs=epochs
# )


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
evaluate = model.evaluate(np.array(x_test), np.array(y_test))
print("Loss of the model is - ", evaluate[0])
print("Accuracy of the model is - ", evaluate[1]*100, "%")

In [None]:
num_classes = 2
predictions = (model.predict(np.array(x_test)) > 0.5).astype("int32")
confusion = tf.math.confusion_matrix(labels=y_test, predictions=predictions, num_classes=num_classes)

group_counts = [value for value in np.array(confusion).flatten()]
group_names = ['Verdadeiro Positivo', 'Falso Positivo', 'Falso Negativo', 'Verdadeiro Negativo']
group_percentages = []
for index, line in enumerate(confusion):
  for item in line:
    group_percentages.append(item.numpy()/np.sum(line))

annotations = [f'{name}\n{round(percentage*100, 2)}%\nQuantidade: {count}' for name, count,
               percentage in zip(group_names, group_counts, group_percentages)]

annotations = np.asarray(annotations).reshape(2, 2)
plt.subplots(figsize=(15, 10))
tick_labels = ['Sim', 'NÃ£o']
sns.heatmap(confusion,  annot=annotations, fmt='', xticklabels=tick_labels, yticklabels=tick_labels)

plt.xlabel("Detectado")
plt.ylabel("Real")

sns.set(font_scale=2)
plt.show()


In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)
