**0. Project Information**

In [None]:
# Owner      : Jacky Setiawan (2602190444) and Ichsan Ilyasa (2602191245)
# Date       : Monday, May 6th 2024
# Description: This is a machine learning project for research methodology course
# Project    : Deep Learning Modelling for Pneumonia Detection using Chest Radiographs

**1. Dataset Information**

In [None]:
# Dataset Source     : https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
# Dataset Description: This dataset was collected from Guangzhou Women and Children’s Medical Center's paper named "Detecting Pneumonia in Chest X-Rays with a CNN"
# Dataset Content    : The dataset is organized into 3 folders (train, test, val) and contains subfolders for each image category (Pneumonia/Normal)
# Dataset Features   : All chest radiographs were initially screened for quality control by removing all low quality or unreadable scans
# Dataset Overview   : 5863 instances, 2 categories (Pneumonia/Normal)

# Add Ons            : https://data.mendeley.com/datasets/rscbjbr9sj/2

# **2. Dataset Collection**

In [None]:
import cv2
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import seaborn as sns
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
from keras.applications.vgg19 import VGG19
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization, Input, Concatenate, LSTM
from keras.models import Sequential, Model
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop
from torch.utils.data import DataLoader, TensorDataset
from ultralytics import YOLO
from xgboost import XGBClassifier as xgb, DMatrix

In [None]:
labels = ['NORMAL', 'PNEUMONIA']

def get_training_data(data_dir):
    X, y = [], []
    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (150, 150))
                X.append(resized_arr)
                y.append(class_num)
            except Exception as e:
                print(e)
    return np.array(X), np.array(y)

X_train, y_train = get_training_data('pneumonia/train')
X_test, y_test = get_training_data('pneumonia/test')
X_val, y_val = get_training_data('pneumonia/val')

In [None]:
plt.figure(figsize = (10,6))
pd.DataFrame(y_train).value_counts().plot(kind='bar', color=['gray', 'gray'], title='Pneumonia vs Normal')
plt.xticks(range(len(labels)), labels, rotation=0)
plt.xlabel('')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(X_train[0], cmap='gray')
plt.title(labels[y_train[0]])

plt.subplot(1, 2, 2)
plt.imshow(X_train[-1], cmap='gray')
plt.title(labels[y_train[-1]])

plt.show()

In [None]:
X_train = (np.array(X_train) / 255).reshape(-1, 150, 150, 1)
X_test = (np.array(X_test) / 255).reshape(-1, 150, 150, 1)
X_val = (np.array(X_val) / 255).reshape(-1, 150, 150, 1)

y_train = np.array(y_train)
y_test = np.array(y_test)
y_val = np.array(y_val)

In [None]:
datagen = ImageDataGenerator(featurewise_center = False, samplewise_center = False,
                             featurewise_std_normalization = False, samplewise_std_normalization = False,
                             zca_whitening = False, rotation_range = 30, zoom_range = 0.2,
                             width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True,
                             vertical_flip = False)
datagen.fit(X_train)

# **3. Convolutional Neural Network**

In [None]:
model = Sequential()
model.add(Input(shape=(150, 150, 1)))
model.add(Conv2D(32, (3, 3), strides=1, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(64, (3, 3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(64, (3, 3), strides=1, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(128, (3, 3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(256, (3, 3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer="rmsprop", loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1, factor=0.3, min_lr=0.00001)
history = model.fit(datagen.flow(X_train,y_train, batch_size = 16), epochs = 100, validation_data = datagen.flow(X_val, y_val), callbacks = [learning_rate_reduction])

In [None]:
evaluation = model.evaluate(X_test, y_test)
print("Loss of the model is - " , evaluation[0])
print("Accuracy of the model is - " , evaluation[1]*100 , "%")

In [None]:
epochs = [i for i in range(100)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(10,5)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.round(y_pred)

classification_report = classification_report(y_test, y_pred_classes)
print(classification_report)

In [None]:
# model.save('pneumonia_models/model-cnn.h5')

# **4. Visual Geometry Group 19**

In [None]:
input_shape = (150, 150, 1)
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

input_layer = Input(shape=input_shape)
x = Concatenate()([input_layer, input_layer, input_layer])
x = base_model(x, training=False)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(2, activation='softmax')(x)

model = Model(inputs=input_layer, outputs=x)

model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1, factor=0.3, min_lr=0.00001)
history = model.fit(datagen.flow(X_train,y_train, batch_size = 16), epochs = 100, validation_data = datagen.flow(X_val, y_val), callbacks = [learning_rate_reduction])

In [None]:
evaluation = model.evaluate(X_test, y_test)
print("Loss of the model is - " , evaluation[0])
print("Accuracy of the model is - " , evaluation[1]*100 , "%")

In [None]:
epochs = [i for i in range(100)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(10,5)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
if y_test.ndim > 1:
    y_test = np.argmax(y_test, axis=1)

report = classification_report(y_test, y_pred_classes)
print(report)

In [None]:
# model.save('pneumonia_models/model-vgg.h5')

# **5. eXtreme Gradient Boosting**

In [None]:
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)

X_train_combined = np.vstack((X_train_flat, X_val_flat))
y_train_combined = np.concatenate((y_train, y_val))

model = xgb(
    max_depth=6,
    learning_rate=0.0001,
    n_estimators=100,
    eval_metric='logloss',
    n_jobs=-1,
    tree_method='hist'
)

model.fit(X_train_combined, y_train_combined,
          eval_set=[(X_test_flat, y_test)],
          verbose=True)

In [None]:
y_pred = model.predict(X_test_flat)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=labels))

# **6. Long Short Term Memory**

In [None]:
model = Sequential([
    LSTM(64, input_shape=(150, 150), return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=RMSprop(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1, factor=0.3, min_lr=0.00001)
history = model.fit(datagen.flow(X_train,y_train, batch_size = 16), epochs = 50, validation_data = datagen.flow(X_val, y_val), callbacks = [learning_rate_reduction])

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)

classification_report = classification_report(y_test, y_pred_classes)
print(classification_report)

# **7. Multi Layer Perceptron**

In [None]:
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)

X_train_tensor = torch.tensor(X_train_flat, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val_flat, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_flat, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
model = nn.Sequential(
    nn.Linear(150 * 150, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, 2)
)

optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9)
criterion = nn.CrossEntropyLoss()

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%')

In [None]:
model.eval()
y_pred, y_true = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

report = classification_report(y_true, y_pred, target_names=['NORMAL', 'PNEUMONIA'])
print(report)

# **8. You Only Look Once**

In [None]:
model = YOLO('yolov8n-cls.pt')

results = model.train(
    data='pneumonia',
    epochs=100,
    imgsz=150,
    batch=16,
    lr0=0.0001,
    pretrained=True
)

results_val = model.val()

In [None]:
results = model.predict(source='pneumonia_yolo/test', save=False)

In [None]:
pred_labels = [result.probs.top1 for result in results]
true_labels = []

for result in results:
    filename = result.path.split('\\')[-1]
    if filename.startswith('person'):
        true_labels.append(1)
    else:
        true_labels.append(0)

report = classification_report(true_labels, pred_labels)
print(report)