# Pneumonia Detection with Convolutional Neural Network: A classification Problem

### Importing the libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
from sklearn.metrics import classification_report
import numpy

In [2]:
tf.__version__

'2.2.1'

## Part 1 - Data Preprocessing

### Preprocessing the Training set 

In [3]:
# rescale (normalize) the images and generate a set of new images with differnet rotations and zoom levels (augmentatoin) to prevent overfitting
# it can also help to balance the imbalanced data
# by default keras automatically converts all images to RGB mode, so no need to convert images, if they are differnet.
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
# apply the data augmentatoin on the dataset and reduce their size for faster (but of course less accurate) learning
img_width, img_height = 64, 64
training_set = train_datagen.flow_from_directory('dataset/train',
                                                 target_size = (img_width, img_height),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

Found 5216 images belonging to 2 classes.


### Examine the class label imbalance

In [4]:
# Get the counts for each class
from collections import Counter
cases_count = Counter(training_set.classes)
print(cases_count.items())

dict_items([(0, 1341), (1, 3875)])


### Weighting classes to balance the data

In [5]:
total = cases_count[0] + cases_count[1]
weight_for_0 = (1 / cases_count[0]) * (total / 2.0)
weight_for_1 = (1 / cases_count[1]) * (total / 2.0)
class_weight = {0: weight_for_0, 1: weight_for_1}
print(class_weight)

{0: 1.9448173005219984, 1: 0.6730322580645162}


### Preprocessing the Validation set

In [6]:
val_datagen = ImageDataGenerator(rescale = 1./255)
val_set = val_datagen.flow_from_directory('dataset/val',
                                            target_size = (img_width, img_height),
                                            batch_size = 32,
                                            class_mode = 'binary')

Found 16 images belonging to 2 classes.


### Preprocessing the Test set

In [7]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory('dataset/test',
                                            target_size = (img_width, img_height),
                                            batch_size = 32,
                                            class_mode = 'binary',
                                            shuffle=False)

Found 624 images belonging to 2 classes.


## Part 2 - Building the CNN

### Initialising the CNN

In [8]:
cnn = tf.keras.Sequential()

### Step 1 - Convolution

In [9]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[img_width, img_height, 3]))

### Step 2 - Pooling

In [10]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Adding a second convolutional layer

In [11]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 3 - Flattening

In [12]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [13]:
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

### Step 5 - Output Layer

In [14]:
cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Part 3 - Training the CNN

### Compiling the CNN

In [15]:
cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Training the CNN on the Training set and evaluating it on the Test set

In [16]:
#model_fit = cnn.fit(x = training_set, validation_data = val_set, epochs = 1, class_weight=class_weight)

### Save model weights

In [17]:
# save weights
#cnn.save_weights('saved_weights/cnn')
#cnn.save('model.h5')

In [18]:
# Load weights
model_fit = cnn.load_weights('saved_weights/cnn')
#model_fit = keras.models.load_model('model.h5')

### Model evaluatoin

In [19]:
model_evaluation = cnn.evaluate(test_set)



In [20]:
print(f"Model Accuracy: {model_evaluation[1]*100: 0.2f}%")

Model Accuracy:  91.19%


### Generating Classificatoin Report and Confusion Matrix

In [21]:
# Predict classes
preds = np.round(cnn.predict(test_set),0)

In [22]:
# Ground truth classes and labels
gt_class = test_set.classes
gt_labels = list(test_set.class_indices.keys())

In [23]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score
cm = confusion_matrix(gt_class, preds)
print(cm)
print('The cnn model has an accuracy score of {:.2f} and recall score of {:.2f} for {:s}'.format((accuracy_score(gt_class, preds) *100),(recall_score(gt_class, preds) *100),gt_labels[1]))

[[195  39]
 [ 16 374]]
The cnn model has an accuracy score of 91.19 and recall score of 95.90 for PNEUMONIA


In [24]:
# Detailed report
report = classification_report(gt_class, preds, target_names=gt_labels)
print(report)

              precision    recall  f1-score   support

      NORMAL       0.92      0.83      0.88       234
   PNEUMONIA       0.91      0.96      0.93       390

    accuracy                           0.91       624
   macro avg       0.91      0.90      0.90       624
weighted avg       0.91      0.91      0.91       624

