# Pneumonia detection VGG16

Kaggle dataset:
https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia

Pre-trained model:
https://www.kaggle.com/code/sayooj98/pneumonia-detection-vgg16

# Connect to Kaggle

In [1]:
# Connect to Kaggle
from google.colab import files

# Remove any old Kaggle.json file
!rm /content/kaggle.json -f
files.upload() #upload kaggle.json

# Connect to Kaggle account
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
kaggle.json


## Download Kaggle public dataset

In [2]:
# Download kaggle dataset for Pneumonia classification
# Link: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia

!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip -q '/content/chest-xray-pneumonia.zip' -d .

Downloading chest-xray-pneumonia.zip to /content
100% 2.28G/2.29G [00:26<00:00, 156MB/s]
100% 2.29G/2.29G [00:26<00:00, 91.4MB/s]


# Preparation

## Import libraries

In [3]:
# import the libraries as shown below
from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import tensorflow as tf
from keras.models import load_model
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
from keras.utils import load_img, img_to_array

from prettytable import PrettyTable

## Define variables

In [4]:
# define variables
image_size =[224,224]
train_path = '../content/chest_xray/train'
test_path = '../content/chest_xray/test'

## Import VGG16 model

In [5]:
vgg = VGG16(input_shape=image_size + [3], weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
# Exlude the existing weights from the training
for layer in vgg.layers:
    layer.trainable = False

In [7]:
# Get number of classes from the folders
folders = glob('../content/chest_xray/train/*')

In [8]:
x = Flatten()(vgg.output)

# Load the model

In [9]:
prediction = Dense(len(folders), activation='softmax')(x)

# create a model object
model = Model(inputs=vgg.input, outputs=prediction)

## Model summary

In [10]:
# for viewing the structure of the model
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

## Compile the model

In [11]:
# tell the model what cost and optimization method to use
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

## Data preprocessing

In [12]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [13]:
# Make sure you provide the same target size as initialied for the image size
training_set = train_datagen.flow_from_directory('/content/chest_xray/train',
                                                 target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

Found 5216 images belonging to 2 classes.


In [14]:
test_set = test_datagen.flow_from_directory('../content/chest_xray/test',
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')

Found 624 images belonging to 2 classes.


In [15]:
len(training_set)

163

In [16]:
len(test_set)

20

## Fit the model

In [17]:
#fit the model

r = model.fit_generator(
    training_set,
    validation_data=test_set,
    epochs=5,
    steps_per_epoch= len(training_set),
    validation_steps= len(test_set)
)

  r = model.fit_generator(


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Save the model

In [18]:
model.save('Model_on_Public_Dataset.h5')

### Save the model to Google Drive

In [19]:
# Copy the model to google drive.
!cp '/content/Model_on_Public_Dataset.h5' '/content/drive/MyDrive/Colab Notebooks/Model_on_Public_Dataset.h5'

# Load and Predict

## Load the saved model

In [20]:
model = load_model('/content/drive/MyDrive/Colab Notebooks/Model_on_Public_Dataset.h5')

## Use the model on public valiadtion dataset

In [21]:
# Validate the model on the validation dataset
from os import listdir
normal_path = '/content/chest_xray/val/NORMAL/'
abnormal_path = '/content/chest_xray/val/PNEUMONIA/'

results = []

# validate the normal images
images = listdir(normal_path)
for i in images:
  if(i.split('.')[1] == 'jpeg'):
    test_image = load_img(normal_path + i, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Normal'])

# validate the pneumonia images
images = listdir(abnormal_path)
for i in images:
  if(i.split('.')[1] == 'jpeg'):
    test_image = load_img(abnormal_path + i, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Pneumonia'])

results



[[array([[0.83907855, 0.16092142]], dtype=float32), 'Normal'],
 [array([[9.998078e-01, 1.922398e-04]], dtype=float32), 'Normal'],
 [array([[0.59529966, 0.40470034]], dtype=float32), 'Normal'],
 [array([[0.9452433 , 0.05475667]], dtype=float32), 'Normal'],
 [array([[0.412803, 0.587197]], dtype=float32), 'Normal'],
 [array([[0.99826103, 0.00173897]], dtype=float32), 'Normal'],
 [array([[0.80700034, 0.19299968]], dtype=float32), 'Normal'],
 [array([[0.9961754, 0.0038246]], dtype=float32), 'Normal'],
 [array([[5.2009500e-04, 9.9947995e-01]], dtype=float32), 'Pneumonia'],
 [array([[0.00628525, 0.99371475]], dtype=float32), 'Pneumonia'],
 [array([[6.8920688e-04, 9.9931085e-01]], dtype=float32), 'Pneumonia'],
 [array([[0.07897868, 0.92102134]], dtype=float32), 'Pneumonia'],
 [array([[0.00374677, 0.99625325]], dtype=float32), 'Pneumonia'],
 [array([[0.00303574, 0.9969643 ]], dtype=float32), 'Pneumonia'],
 [array([[0.47570974, 0.5242903 ]], dtype=float32), 'Pneumonia'],
 [array([[0.00144865, 0.

In [22]:
# Calculate the accuracy on the validation data
true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0

for r in range(len(results)):
  if(results[r][1] == 'Normal'):
    if(results[r][0][0][0] > results[r][0][0][1]):
      true_negative = true_negative + 1
    else:
      false_negative = false_negative + 1
  else:
    if(results[r][0][0][0] < results[r][0][0][1]):
      true_positive = true_positive + 1
    else:
      false_positive = false_positive + 1

all = true_positive + true_negative + false_positive + false_negative

In [23]:
accuracy_table = PrettyTable(["Total: " + str(len(results)), "Actual positive", "Actual negative"])

print('Predicting on public dataset:')
accuracy_table.add_row(['Predicted positive', 'True positive\n' + str(true_positive), 'False positive\n' + str(false_positive)])
accuracy_table.add_row(['-------------------', '-------------------', '-------------------'])
accuracy_table.add_row(['Predicted negative', 'False negative\n' + str(false_negative), 'True negative\n' + str(true_negative)])

print(accuracy_table)

print('\nAccuracy: ' + str(100 * (true_positive + true_negative)/all) + ' %')

Predicting on public dataset:
+---------------------+---------------------+---------------------+
|      Total: 16      |   Actual positive   |   Actual negative   |
+---------------------+---------------------+---------------------+
|  Predicted positive |    True positive    |    False positive   |
|                     |          8          |          0          |
| ------------------- | ------------------- | ------------------- |
|  Predicted negative |    False negative   |    True negative    |
|                     |          1          |          7          |
+---------------------+---------------------+---------------------+

Accuracy: 93.75 %


## Use the model on Hakeem dataset

In [24]:
from os import listdir
normal_path = '/content/drive/MyDrive/AI4D Course/Project/Data/Hakeem Chest Images/normal/'
abnormal_path = '/content/drive/MyDrive/AI4D Course/Project/Data/Hakeem Chest Images/pneumonia/'

results = []

files = listdir(normal_path)
for f in files:
  if(f.split('.')[1] == 'jpg'):
    test_image = load_img(normal_path + f, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Normal'])

files = listdir(abnormal_path)
for f in files:
  if(f.split('.')[1] == 'jpg'):
    test_image = load_img(abnormal_path + f, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Pneumonia'])

results



[[array([[0.990936  , 0.00906401]], dtype=float32), 'Normal'],
 [array([[0.98806155, 0.01193848]], dtype=float32), 'Normal'],
 [array([[0.37968424, 0.62031573]], dtype=float32), 'Normal'],
 [array([[9.9923432e-01, 7.6567085e-04]], dtype=float32), 'Normal'],
 [array([[0.75095224, 0.24904776]], dtype=float32), 'Normal'],
 [array([[0.60733676, 0.3926633 ]], dtype=float32), 'Normal'],
 [array([[0.9532538 , 0.04674617]], dtype=float32), 'Pneumonia'],
 [array([[0.82524234, 0.17475773]], dtype=float32), 'Pneumonia'],
 [array([[0.0580475, 0.9419525]], dtype=float32), 'Pneumonia'],
 [array([[0.9342177 , 0.06578226]], dtype=float32), 'Pneumonia'],
 [array([[0.03436057, 0.9656394 ]], dtype=float32), 'Pneumonia']]

In [25]:
# Calculate the accuracy on the Hakeem dataset
true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0

for r in range(len(results)):
  if(results[r][1] == 'Normal'):
    if(results[r][0][0][0] > results[r][0][0][1]):
      true_negative = true_negative + 1
    else:
      false_negative = false_negative + 1
  else:
    if(results[r][0][0][0] < results[r][0][0][1]):
      true_positive = true_positive + 1
    else:
      false_positive = false_positive + 1

all = true_positive + true_negative + false_positive + false_negative

In [26]:
accuracy_table = PrettyTable(["Total: " + str(len(results)), "Actual positive", "Actual negative"])

print('Predicting on public dataset:')
accuracy_table.add_row(['Predicted positive', 'True positive\n' + str(true_positive), 'False positive\n' + str(false_positive)])
accuracy_table.add_row(['-------------------', '-------------------', '-------------------'])
accuracy_table.add_row(['Predicted negative', 'False negative\n' + str(false_negative), 'True negative\n' + str(true_negative)])

print(accuracy_table)

print('\nAccuracy: ' + str(100 * (true_positive + true_negative)/all) + ' %')

Predicting on public dataset:
+---------------------+---------------------+---------------------+
|      Total: 11      |   Actual positive   |   Actual negative   |
+---------------------+---------------------+---------------------+
|  Predicted positive |    True positive    |    False positive   |
|                     |          2          |          3          |
| ------------------- | ------------------- | ------------------- |
|  Predicted negative |    False negative   |    True negative    |
|                     |          1          |          5          |
+---------------------+---------------------+---------------------+

Accuracy: 63.63636363636363 %


# Change the model parameters

In [27]:
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
r = model.fit_generator(
    training_set,
    validation_data=test_set,
    epochs=6,
    steps_per_epoch= len(training_set),
    validation_steps= len(test_set)
)

  r = model.fit_generator(


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6

In [None]:
# Validate the model on the validation dataset
from os import listdir
normal_path = '/content/chest_xray/val/NORMAL/'
abnormal_path = '/content/chest_xray/val/PNEUMONIA/'

results = []

# validate the normal images
images = listdir(normal_path)
for i in images:
  if(i.split('.')[1] == 'jpeg'):
    test_image = load_img(normal_path + i, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Normal'])

# validate the pneumonia images
images = listdir(abnormal_path)
for i in images:
  if(i.split('.')[1] == 'jpeg'):
    test_image = load_img(abnormal_path + i, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Pneumonia'])

results

In [None]:
# Calculate the accuracy on the validation data
true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0

for r in range(len(results)):
  if(results[r][1] == 'Normal'):
    if(results[r][0][0][0] > results[r][0][0][1]):
      true_negative = true_negative + 1
    else:
      false_negative = false_negative + 1
  else:
    if(results[r][0][0][0] < results[r][0][0][1]):
      true_positive = true_positive + 1
    else:
      false_positive = false_positive + 1

all = true_positive + true_negative + false_positive + false_negative

In [None]:
accuracy_table = PrettyTable(["Total: " + str(len(results)), "Actual positive", "Actual negative"])

print('Predicting on public dataset:')
accuracy_table.add_row(['Predicted positive', 'True positive\n' + str(true_positive), 'False positive\n' + str(false_positive)])
accuracy_table.add_row(['-------------------', '-------------------', '-------------------'])
accuracy_table.add_row(['Predicted negative', 'False negative\n' + str(false_negative), 'True negative\n' + str(true_negative)])

print(accuracy_table)

print('\nAccuracy: ' + str(100 * (true_positive + true_negative)/all) + ' %')

In [None]:
from os import listdir
normal_path = '/content/drive/MyDrive/AI4D Course/Project/Data/Hakeem Chest Images/normal/'
abnormal_path = '/content/drive/MyDrive/AI4D Course/Project/Data/Hakeem Chest Images/pneumonia/'

results = []

files = listdir(normal_path)
for f in files:
  if(f.split('.')[1] == 'jpg'):
    test_image = load_img(normal_path + f, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Normal'])

files = listdir(abnormal_path)
for f in files:
  if(f.split('.')[1] == 'jpg'):
    test_image = load_img(abnormal_path + f, target_size=[224,224])
    test_image = img_to_array(test_image)
    test_image = test_image/255
    test_image = np.expand_dims(test_image,axis=0)
    results.append([model.predict(test_image), 'Pneumonia'])

results

In [None]:
# Calculate the accuracy on the Hakeem dataset
true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0

for r in range(len(results)):
  if(results[r][1] == 'Normal'):
    if(results[r][0][0][0] > results[r][0][0][1]):
      true_negative = true_negative + 1
    else:
      false_negative = false_negative + 1
  else:
    if(results[r][0][0][0] < results[r][0][0][1]):
      true_positive = true_positive + 1
    else:
      false_positive = false_positive + 1

all = true_positive + true_negative + false_positive + false_negative

In [None]:
accuracy_table = PrettyTable(["Total: " + str(len(results)), "Actual positive", "Actual negative"])

print('Predicting on public dataset:')
accuracy_table.add_row(['Predicted positive', 'True positive\n' + str(true_positive), 'False positive\n' + str(false_positive)])
accuracy_table.add_row(['-------------------', '-------------------', '-------------------'])
accuracy_table.add_row(['Predicted negative', 'False negative\n' + str(false_negative), 'True negative\n' + str(true_negative)])

print(accuracy_table)

print('\nAccuracy: ' + str(100 * (true_positive + true_negative)/all) + ' %')