In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# from keras import layers
# from keras.layers import BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
import os
tf.__version__
tf.random.set_seed(123)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [9]:
# Adding data paths
parent_dir = r'/mnt/E/datasets/pneumonia_kaggle'
train_dir = os.path.join(parent_dir,'train')
test_dir = os.path.join(parent_dir,'test')
val_dir = os.path.join(parent_dir,'val')
save_dir = r'/mnt/D/Projects/Pneumonia Kaggle'

In [10]:
# Confirm that the data is balanced
norm_train = os.listdir(os.path.join(train_dir, 'NORMAL'))
pneu_train = os.listdir(os.path.join(train_dir, 'PNEUMONIA'))
print(f"Normal: {len(norm_train)}")
print(f"Pneumonia: {len(pneu_train)}")

# The data is not balanced
diff = len(pneu_train) - len(norm_train)
print(f"Difference: {diff}")

Normal: 3495
Pneumonia: 3495
Difference: 0


In [11]:
# Do augmentation and write data to disk
# It is only used if difference between classes > 1 image
# This is to avoid repetition while enhancing the model
from scipy import ndimage
import cv2

# Use counter to stop when the difference between classes is zero
if diff > 1:
    count = diff

    norm_path = os.path.join(train_dir, 'NORMAL')

    for im in norm_train:

        if count > 0:
            img = cv2.imread(os.path.join(norm_path, im))

            rotated_1 = ndimage.rotate(img, 10)
            cv2.imwrite(f"{norm_path}/augmented_1_{im}", rotated_1)

            rotated_2 = ndimage.rotate(img, -10)
            cv2.imwrite(f"{norm_path}/augmented_2_{im}", rotated_2)
            count -= 2
        
        else:
            break


In [12]:
# Setting parameters
batch_size = 32
img_height = 450
img_width = 450
epochs = 20

In [13]:
# Converting data directory to a training set
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 6990 files belonging to 2 classes.


In [14]:
# Converting data directory to a validation set
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    val_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 776 files belonging to 2 classes.


In [15]:
# Converting data directory to a test set
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 624 files belonging to 2 classes.


In [16]:
# Getting class names
class_names = train_ds.class_names
print(class_names)

['NORMAL', 'PNEUMONIA']


In [17]:
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(scale=1.0/255)
train_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_ds = val_ds.map(lambda image,label:(rescale(image),label))
test_ds  = test_ds.map(lambda image,label:(rescale(image),label))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [19]:

# Structuring the model
num_classes = len(class_names)

callback = [tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.001, patience=1, verbose=1, restore_best_weights=True),
            tf.keras.callbacks.ModelCheckpoint(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")]
model = keras.models.Sequential()
# model.add(layers.experimental.preprocessing.Rescaling(1./255))
model.add(layers.Conv2D(32,3, activation='relu', input_shape=(img_width,img_height,1)))
model.add(layers.MaxPool2D())
# model.add(layers.Dropout(0.5))
# model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32,3,activation='relu'))
model.add(layers.MaxPool2D())
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dropout(0.5))
# model.add(layers.BatchNormalization())
model.add(layers.Dense(1, activation='sigmoid'))
# model.add(layers.Dense(num_classes))

In [20]:
model.compile(
              optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [21]:
# Fitting the model
history = model.fit(
                    train_ds,
                    validation_data=val_ds,
                    epochs=epochs,
                    callbacks=[callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 9: early stopping


In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 448, 448, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 224, 224, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 222, 222, 32)      9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 394272)            0         
                                                                 
 dense (Dense)               (None, 64)                2

In [24]:
# Save model
# model.save(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")

In [25]:
# model = keras.models.load_model(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")

In [27]:
# Getting x and y from the test set
test_preds = []
y_test = []
for x,y in test_ds:
    test_pred = model.predict(x)
    # x_test.append(x)
    # y_test.append(y)
    test_preds.extend(test_pred)
    y_test.extend(y)

preds = np.array(test_preds)
y_pred = np.where(preds > 0.5, 1, 0)
y_test = np.array(y_test)



In [28]:
# Assessing model performance using confusion matrix
from sklearn.metrics import confusion_matrix

cm  = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision * recall)/(precision + recall))

print("Recall of the model is {:.2f}".format(recall))
print("Precision of the model is {:.2f}".format(precision))
print("F1-score of the model is {:.2f}".format(f1))

Recall of the model is 1.00
Precision of the model is 0.71
F1-score of the model is 0.83


In [31]:
# Predict
# E:\Downloads\chest pneumonia.jpg
img = keras.preprocessing.image.load_img(r"/mnt/E/datasets/pneumonia_kaggle/test/PNEUMONIA/person1_virus_6.jpeg",
                                         target_size=(img_width, img_height),
                                         color_mode='grayscale')
img_array = keras.preprocessing.image.img_to_array(img)
print(img_array.shape)
img_array = tf.expand_dims(img_array, 0)  # Create batch axis
print(img_array.shape)
predictions = model.predict(img_array)
score = predictions[0]
print(score)
tmp_pred = np.where(score > 0.5, 1, 0)
print(tmp_pred)
print("This image is %.2f percent normal and %.2f percent pneumonia." % (100 * (1 - score), 100 * score))

(450, 450, 1)
(1, 450, 450, 1)
[1.]
[1]
This image is 0.00 percent normal and 100.00 percent pneumonia.
