In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# from keras import layers
# from keras.layers import BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
import os
tf.__version__
tf.random.set_seed(123)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
# Adding data paths
parent_dir = r'/mnt/E/datasets/pneumonia_kaggle'
train_dir = os.path.join(parent_dir,'train')
test_dir = os.path.join(parent_dir,'test')
val_dir = os.path.join(parent_dir,'val')
save_dir = r'/mnt/D/Projects/Pneumonia Kaggle'

In [3]:
# Setting parameters
batch_size = 32
img_height = 450
img_width = 450
epochs = 12

In [4]:
# Converting data directory to a training set
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 5216 files belonging to 2 classes.


In [5]:
# Converting data directory to a validation set
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    val_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 16 files belonging to 2 classes.


In [6]:
# Converting data directory to a test set
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    shuffle=True,
    labels='inferred',
    color_mode='grayscale',
    seed=123,
    label_mode='binary',
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 624 files belonging to 2 classes.


In [7]:
# Getting class names
class_names = train_ds.class_names
print(class_names)

['NORMAL', 'PNEUMONIA']


In [8]:
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(scale=1.0/255)
train_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_ds = val_ds.map(lambda image,label:(rescale(image),label))
test_ds  = test_ds.map(lambda image,label:(rescale(image),label))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [9]:
# # Normalizing colors
# normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
# normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
# image_batch, labels_batch = next(iter(normalized_ds))
# first_image = image_batch[0]
# print(np.min(first_image), np.max(first_image))

In [10]:

# Structuring the model
num_classes = len(class_names)

callback = [tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.001, patience=1, verbose=1, restore_best_weights=True),
            tf.keras.callbacks.ModelCheckpoint(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")]
model = keras.models.Sequential()
# model.add(layers.experimental.preprocessing.Rescaling(1./255))
model.add(layers.Conv2D(32,3, activation='relu', input_shape=(img_width,img_height,1)))
model.add(layers.MaxPool2D())
# model.add(layers.Dropout(0.5))
# model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32,3,activation='relu'))
model.add(layers.MaxPool2D())
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dropout(0.5))
# model.add(layers.BatchNormalization())
model.add(layers.Dense(1, activation='sigmoid'))
# model.add(layers.Dense(num_classes))

In [11]:
model.compile(
              optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [43]:
# Fitting the model
history = model.fit(
                    train_ds,
                    validation_data=val_ds,
                    epochs=epochs,
                    callbacks=[callback])

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 11: early stopping


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 448, 448, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 224, 224, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 222, 222, 32)      9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 394272)            0         
                                                                 
 dense (Dense)               (None, 64)                2

In [14]:
# Plotting model performance
# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']

# loss = history.history['loss']
# val_loss = history.history['val_loss']

# epochs_range = range(epochs)

# plt.figure(figsize=(8, 8))
# plt.subplot(1, 2, 1)
# plt.plot(epochs_range, acc, label='Training Accuracy')
# plt.plot(epochs_range, val_acc, label='Validation Accuracy')
# plt.legend(loc='lower right')
# plt.title('Training and Validation Accuracy')

# plt.subplot(1, 2, 2)
# plt.plot(epochs_range, loss, label='Training Loss')
# plt.plot(epochs_range, val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.title('Training and Validation Loss')
# plt.show()

In [None]:
# Save model
# model.save(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")

In [15]:
model = keras.models.load_model(f"{save_dir}/pneumonia_{epochs}e_binary_gray.h5")

In [16]:
# Normalizing test colors
# normalization_test_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
# normalized_test_ds = test_ds.map(lambda x, y: (normalization_test_layer(x), y))
# image_test_batch, labels_test_batch = next(iter(normalized_test_ds))
# first_test_image = image_test_batch[0]
# print(np.min(first_test_image), np.max(first_test_image))

In [17]:
# Getting x and y from the test set
test_preds = []
y_test = []
for x,y in test_ds:
    test_pred = model.predict(x)
    # x_test.append(x)
    # y_test.append(y)
    test_preds.extend(test_pred)
    y_test.extend(y)

preds = np.array(test_preds)
y_pred = np.where(preds > 0.5, 1, 0)
y_test = np.array(y_test)



In [18]:
# Assessing model performance using confusion matrix
from sklearn.metrics import confusion_matrix

cm  = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision * recall)/(precision + recall))

print("Recall of the model is {:.2f}".format(recall))
print("Precision of the model is {:.2f}".format(precision))
print("F1-score of the model is {:.2f}".format(f1))

Recall of the model is 1.00
Precision of the model is 0.70
F1-score of the model is 0.82


In [None]:
# Predict
# E:\Downloads\chest pneumonia.jpg
img = keras.preprocessing.image.load_img(r"/mnt/E/datasets/pneumonia_kaggle/train/PNEUMONIA/person3_bacteria_10.jpeg",
                                         target_size=(img_width, img_height),
                                         color_mode='grayscale')
img_array = keras.preprocessing.image.img_to_array(img)
print(img_array.shape)
img_array = tf.expand_dims(img_array, 0)  # Create batch axis
print(img_array.shape)
predictions = model.predict(img_array)
score = predictions[0]
print(score)
tmp_pred = np.where(score > 0.5, 1, 0)
print(tmp_pred)
print("This image is %.2f percent normal and %.2f percent pneumonia." % (100 * (1 - score), 100 * score))

(450, 450, 1)
(1, 450, 450, 1)
[1.]
[1]
This image is 0.00 percent normal and 100.00 percent pneumonia.


In [None]:
# import pandas as pd
# df = pd.DataFrame(columns=['File', 'Category', 'Prediction','Class'])
# class_names = os.listdir(test_dir)
# count = 0
# # Access test files 
# for c in class_names:
#     path = os.path.join(test_dir,c)
#     print(path)
#     files = os.listdir(path)
#     for file in files:
#         # Load and preprocess image
#         img_path = os.path.join(path,file)
#         img = keras.preprocessing.image.load_img(img_path,
#                                          target_size=(img_width, img_height),
#                                          color_mode='grayscale')
#         img_array = keras.preprocessing.image.img_to_array(img)
#         # print(img_array.shape)
#         img_array = tf.expand_dims(img_array, 0)  # Create batch axis
#         # print(img_array.shape)
#         predictions = model.predict(img_array)
#         score = predictions[0]
#         # Set threshold for classes
#         img_pred = np.where(score > 0.5, 1, 0)

#         # Write rows
#         df.at[count, 'File'] = file
#         df.at[count, 'Category'] = c
#         df.at[count, 'Prediction'] = score
#         df.at[count, 'Class'] = img_pred
#         count += 1


In [None]:
# df.to_csv(f"{parent_dir}/test_preds8e.csv")

This means that the model overfits the data.