<a href="https://colab.research.google.com/github/chisomrutherford/malaria-diagnosis-with-cnn/blob/main/Malaria_Diagnosis_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Diagnosis of Malaria by Analyzing Blood Smears Captured by a Microscope Using Convolutional Neural Network

# Exploratory Data Analysis
This section loads the data and shows examples of the images in the dataset

In [None]:
#Import dependencies
import os
import cv2
import random
import matplotlib.pyplot as plt

#Load the data
data_dir = 'malaria_dataset/cell_images/cell_images'
categories = ['Parasitized', 'Uninfected']

#This function displays the images in the dataset
def show_random_images (data_dir, categories, n= 3):
  plt.figure (figsize = (12, 6))
  for i, cat in enumerate (categories):
    path = os.path.join (data_dir, cat)
    images = os.listdir (path)

    for j in range(n):
      img_path = os.path.join(path, random.choice(images))
      img = cv2.imread(img_path)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      plt.subplot(len(categories), n, i*n+j+1)
      plt.imshow(img)
      plt.title(cat)
      plt.axis('off')
  plt.tight_layout()
  plt.show()

show_random_images(data_dir, categories)


# Data Preprocessing

In [None]:
#Import necessary dependencies and also perform data augmentation.

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_size = 128

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 20,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
    validation_split = 0.2

)
print ('Data augmentation complete. 20% of the dataset for validation.')

In [None]:
#Create a data generator for the validation set

train_generator= train_datagen.flow_from_directory (
    data_dir,
    target_size = (img_size, img_size),
    batch_size =32,
    class_mode = 'binary',
    subset = 'training',
    shuffle = True
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(img_size, img_size),
    batch_size=32,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

# Model Development

In [None]:
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_size, img_size, 3)),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(256, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(512, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy',
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()])

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=3)
]

history = model.fit(
    train_generator,
    epochs=30,
    validation_data=validation_generator,
    callbacks=callbacks
)

print ('Congratulation! Training is complete')

# Model Evaluation

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

val_loss, val_acc, val_precision, val_recall = model.evaluate(validation_generator)
print(f"\nTrue Validation Accuracy: {val_acc:.2%}")
print(f"Precision: {val_precision:.2%}")
print(f"Recall: {val_recall:.2%}")

# Proper confusion matrix generation
y_pred = model.predict(validation_generator)
y_pred_binary = (y_pred > 0.5).astype(int)

cm = confusion_matrix(validation_generator.classes, y_pred_binary)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=categories,
            yticklabels=categories)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()