<a href="https://colab.research.google.com/github/ashwinkrishna948/pneumonia-xray-cnn-classifier/blob/main/Pneumonia_X_Ray_Image_Classifier_using_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras import backend as K
from keras.models import load_model

import tensorflow as tf

import os

In [None]:
train_folder = "C:/Users/ashwi/OneDrive/Desktop/chest_xray/train"
test_folder = "C:/Users/ashwi/OneDrive/Desktop/chest_xray/test"
val_folder = "C:/Users/ashwi/OneDrive/Desktop/chest_xray/val"

In [None]:
# Directory of normal train data
train_n = train_folder + "/NORMAL/"

# Directory of penumonia train data
train_p = train_folder + "/PNEUMONIA/"

# Directory of normal test data
test_n = test_folder + "/NORMAL/"

# Directory of penumonia test data
test_p = test_folder + "/PNEUMONIA/"

# Directory of normal validation data
val_n = val_folder + "/NORMAL/"

# Directory of penumonia validation data
val_p = val_folder + "/PNEUMONIA/"

In [None]:
count_total = {'Normal Train':count_train_n, 'Pneumonia Train':count_train_p,
               'Normal Test':count_test_n, 'Pneumonia Test':count_test_p,
               'Normal Validation':count_val_n, 'Pneumonia Validation':count_val_p}

# Visulize the number of x-ray images for each data category using seaborn library
sns.barplot(x=list(count_total.keys()), y=list(count_total.values()))
plt.xlabel('Data Categories')
plt.ylabel('Number of X-Ray Images')
plt.title('Number of X-Ray Images by Each Data Category')
plt.xticks(rotation=45)

# Add values on the plot
for i, v in enumerate(count_total.values()):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.show()

In [None]:
# Select normal train pic
rand_norm = np.random.randint(0, count_train_n)
norm_pic = os.listdir(train_n)[rand_norm]
norm_pic_dir = train_n + norm_pic
print(f"Train normal x-ray image title: {norm_pic}")


# Select pneumonia train pic
rand_pneu = np.random.randint(0, count_train_p)
pneu_pic = os.listdir(train_p)[rand_pneu]
pneu_pic_dir = train_p + pneu_pic
print(f"Train pneumonia x-ray image title: {pneu_pic}")

# Load images
norm_load = Image.open(norm_pic_dir)
pneu_load = Image.open(pneu_pic_dir)

# Plot images
plt.figure(figsize=(8,5))
plt1 = plt.subplot(1,2,1)
plt1.imshow(norm_load)
plt1.set_title('Normal')

plt2 = plt.subplot(1,2,2)
plt2.imshow(pneu_load)
plt2.set_title('Pneumonia')

In [None]:
# Create ImageDataGenerator
image_generator = ImageDataGenerator(
    rotation_range=10,           # Rotate images by a random degree between -10 and +10
    width_shift_range=0.1,       # Shift the width of the image randomly by up to 10%
    height_shift_range=0.1,      # Shift the height of the image randomly by up to 10%
    shear_range=0.2,             # Apply random shearing transformations
    zoom_range=0.2,              # Apply random zooming transformations
    horizontal_flip=True,        # Flip images horizontally
    vertical_flip=False,         # Do not flip images vertically (specific to X-ray images)
    rescale=1./255               # Rescale pixel values to [0, 1] range
)

In [None]:
# Train data augmentation
train_generator = image_generator.flow_from_directory(
    directory=train_folder,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Specify class_mode as 'binary' for two classes
    classes=['NORMAL', 'PNEUMONIA']  # Specify the class labels based on the folder names
)

In [None]:
x, y = train_generator.__getitem__(0)
plt.imshow(x[0]);

In [None]:
# Validation data augmentation
valid_generator = image_generator.flow_from_directory(
    directory=val_folder,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Specify class_mode as 'binary' for two classes
    classes=['NORMAL', 'PNEUMONIA']  # Specify the class labels based on the folder names
)

In [None]:
# Test data augmentation
test_generator = image_generator.flow_from_directory(
    directory=test_folder,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Specify class_mode as 'binary' for two classes
    classes=['NORMAL', 'PNEUMONIA']  # Specify the class labels based on the folder names
)

In [None]:
# Plot frequency of normal and pneumonia
plt.xticks(rotation=90)
plt.bar(x=train_generator.class_indices.keys(), height=np.mean(train_generator.labels, axis=0))
plt.title("Frequency of Each Class")
plt.show()

In [None]:
# create the base pre-trained model
base_model = DenseNet121(weights='models/nih/densenet.hdf5', include_top=False)

x = base_model.output

# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)

# and a logistic layer
predictions = Dense(units=1, activation="sigmoid")(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
# Train the model
history = model.fit(train_generator,
                              validation_data=valid_generator,
                              steps_per_epoch=len(train_generator),
                              validation_steps=len(valid_generator),
                              epochs = 3)

# plot training loss
plt.plot(history.history['loss'])
plt.ylabel("loss")
plt.xlabel("epoch")
plt.title("Training Loss Curve")
plt.show()

In [None]:
# Model parameters
model_parameters = model.get_weights()
np.save('model_parameters.npy', model_parameters)

In [None]:
predicted_vals = model.predict(test_generator, steps = len(test_generator))

In [None]:
# Convert the predicted probabilities to binary values (0 or 1)
predicted_classes = (predicted_vals > 0.5).astype(int)

# Convert the ground truth labels to binary values (0 or 1)
true_classes = test_generator.classes.astype(int)

# Calculate accuracy
accuracy = (predicted_classes == true_classes).mean()
print(f"Accuracy of the model is: {accuracy * 100} %" )

In [None]:
# Calculate True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
TP = ((predicted_classes == 1) & (true_classes == 1)).sum()
TN = ((predicted_classes == 0) & (true_classes == 0)).sum()
FP = ((predicted_classes == 1) & (true_classes == 0)).sum()
FN = ((predicted_classes == 0) & (true_classes == 1)).sum()

# Calculate specificity and sensitivity
specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)

In [None]:
print(f"Specificity of the model is: {specificity}")
print(f"sensitivity of the model is: {sensitivity}")

In [None]:
# Calculate Positive Predictive Value (PPV) and Negative Predictive Value (NPV)
PPV = TP / (TP + FP)
print(f"PPV of the model is: {PPV}")

NPV = TN / (TN + FN)
print(f"NPV of the model is: {NPV}")

In [None]:
from sklearn.metrics import roc_curve, auc

# Compute False Positive Rate (FPR), True Positive Rate (TPR), and the corresponding threshold values
fpr, tpr, thresholds = roc_curve(true_classes, predicted_vals)

# Compute Area Under the Curve (AUC)
auc_score = auc(fpr, tpr)

In [None]:
# Plot ROC curve
plt.plot(fpr, tpr, label='ROC Curve (AUC = {:.2f})'.format(auc_score))
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random classifier
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
from sklearn.metrics import f1_score
f1 = f1_score(true_classes, predicted_classes)
print(f"F1 score of the model is: {f1}")