In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import seaborn as sn
import keras
import opendatasets as od

from tensorflow import keras
from tensorflow.keras.utils import img_to_array, load_img

from imutils import paths

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D,MaxPool2D,Flatten,Dense,Dropout,Input,AveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import ResNet50V2
from keras.models import Model
from keras.callbacks import EarlyStopping

%matplotlib inline
from matplotlib import pyplot as plt 
from matplotlib import image as mpimg

In [None]:
#uncomment this cell to download the dataset
#od.download('https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria')

In [None]:
class SimpleDatasetLoader:
    # Method: Constructor
    def __init__(self, preprocessors=None):
        """
        :param preprocessors: List of image preprocessors
        """
        self.preprocessors = preprocessors

        if self.preprocessors is None:
            self.preprocessors = []

    # Method: Used to load a list of images for pre-processing
    def load(self, image_paths, verbose=-1):
        """
        :param image_paths: List of image paths
        :param verbose: Parameter for printing information to console
        :return: Tuple of data and labels
        """
        data, labels = [], []

        for i, image_path in enumerate(image_paths):
            image = cv2.imread(image_path)
            if image is None:
                continue
            label = image_path.split(os.path.sep)[-2]

            if self.preprocessors is not None:
                for p in self.preprocessors:
                    image = p.preprocess(image)

            data.append(image)
            labels.append(label)

            if verbose > 0 and i > 0 and (i+1) % verbose == 0:
                print('[INFO]: Processed {}/{}'.format(i+1, len(image_paths)))

        return (np.array(data), np.array(labels))

In [None]:
#Class Preprocessror 
class SimplePreprocessor:
    # Method: Constructor
    def __init__(self, width, height, interpolation=cv2.INTER_AREA):
        """
        :param width: Image width
        :param height: Image height
        :param interpolation: Interpolation algorithm
        """
        self.width = width
        self.height = height
        self.interpolation = interpolation

    # Method: Used to resize the image to a fixed size (ignoring the aspect ratio)
    def preprocess(self, image):
        """
        :param image: Image
        :return: Re-sized image
        """
        return cv2.resize(image, (self.width, self.height), interpolation=self.interpolation)

In [None]:
from __main__ import SimplePreprocessor
from __main__ import SimpleDatasetLoader

In [None]:
# Get list of image paths
image_paths = list(paths.list_images(r".\cell-images-for-detecting-malaria\cell_images\cell_images"))

# Initialize SimplePreprocessor and SimpleDatasetLoader and load data and labels
print('[INFO]: Images loading....')
sp = SimplePreprocessor(100, 100)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(image_paths, verbose=500)

In [None]:
data.shape

In [None]:
labels.shape

In [None]:
# Creating the RESNET model
baseModel =ResNet50V2(include_top=False,
                     input_tensor=Input(shape=(100,100, 3)))
headModel = baseModel.output
headModel =baseModel.outputheadModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel =Flatten(name="flatten")(headModel)
headModel =Dense(128, activation="relu")(headModel)
headModel =Dropout(0.5)(headModel)
headModel =Dense(1, activation="sigmoid")(headModel)
baseModel.summary()

In [None]:
# Create the final model
model =Model(inputs=baseModel.input, outputs=headModel)

# Compile the model
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

In [None]:
# Set up data augmentation
datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=2,
      width_shift_range=0.1,
      height_shift_range=0.1,
      shear_range=0.1,
      zoom_range=0.5,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)


In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)

In [None]:
le = LabelEncoder()
y_test = le.fit_transform(y_test)

In [None]:
# Perform cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
mcc_scores = []

for train_index, val_index in kfold.split(x_train, y_train):
    train_data, val_data = x_train[train_index], x_train[val_index]
    train_labels, val_labels = y_train[train_index], y_train[val_index]

    # Generate augmented training data
    train_generator = datagen.flow(train_data, train_labels, batch_size=32)

    # Generate augmented validation data
    val_generator = datagen.flow(val_data, val_labels, batch_size=32)
    
    # Define early stopping callback
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)

    # Fit the model
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_data) // 32,
        epochs=20,
        validation_data=val_generator,
        validation_steps=len(val_data) // 32, callbacks=[early_stopping]
    )
     # Predict on validation data
    val_pred = model.predict(val_data)

    # Calculate metrics
    accuracy = accuracy_score(val_labels, (val_pred > 0.5))
    precision = precision_score(val_labels, (val_pred > 0.5), average='micro')
    recall = recall_score(val_labels, (val_pred > 0.5), average='micro')
    f1 = f1_score(val_labels, (val_pred > 0.5), average='micro')
    mcc = matthews_corrcoef(val_labels, (val_pred > 0.5))

    # Append metrics to lists
    accuracy_scores.append(accuracy * 100)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)

In [None]:
# Print the average metrics across all folds
print("Average Cross-Validation Accuracy: %.2f%%" % np.mean(accuracy_scores))
print("Average Cross-Validation Precision: %.2f" % np.mean(precision_scores))
print("Average Cross-Validation Recall: %.2f" % np.mean(recall_scores))
print("Average Cross-Validation F1-Score: %.2f" % np.mean(f1_scores))
print("Average Cross-Validation MCC-Score: %.2f" % np.mean(mcc_scores))

# Evaluate the model on the test data
test_pred = model.predict(x_test)
test_accuracy = accuracy_score(y_test, (test_pred))
test_precision = precision_score(y_test, (test_pred), average='macro')
test_recall = recall_score(y_test, (test_pred), average='macro')
test_f1 = f1_score(y_test, (test_pred ), average='macro')
test_mcc = matthews_corrcoef(y_test, (test_pred))

print("Test Accuracy: %.2f%%" % (test_accuracy * 100))
print("Test Precision: %.2f" % test_precision)
print("Test Recall: %.2f" % test_recall)
print("Test F1-Score: %.2f" % test_f1)
print("Test MCC-Score: %.2f" % test_mcc)

In [None]:
# Print the average accuracy across all folds
print("Average Cross-Validation Accuracy: %.2f%%" % (np.mean(accuracy_scores)))

# Evaluate the model on the test data
test_scores = model.evaluate(x_test, y_test, verbose=1)
print("Test Accuracy: %.2f%%" % (test_scores[1] * 100))


In [None]:
model.save("RESNET50_infection status_updated.h5")

In [None]:

# history = model1.fit(train_x, train_y,validation_split = 0.1, epochs=50, batch_size=4)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.savefig('RESNET50_Infection status.png', dpi=300, transparent=False)

In [None]:
# plotting loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.savefig('RESNET50_Infection status.png', dpi=300, transparent=False)