## Reading the Dataset

In [1]:
# Importing dependencies
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Setting up path to read/view pictures
dataset_dir = 'path_to_your_dataset_folder'
parasitized_dir = os.path.join(dataset_dir, 'parasitized')
uninfected_dir = os.path.join(dataset_dir, 'uninfected')

In [None]:
# Create & Display images
def display_images(folder):
    plt.figure(figsize=(12, 12))
    for i, filename in enumerate(os.listdir(folder)[:5]):  # Display the first 5 images
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        plt.subplot(1, 5, i + 1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.title(filename)
    plt.show()

In [None]:
# Call the display_images function for both "parasitized" and "uninfected" folders to visualize the images
display_images(parasitized_dir)
display_images(uninfected_dir)

## Image Pre-Processing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# Function to preprocess and load images
def load_and_preprocess_images(folder, target_size=(224, 224)):
    images = []
    labels = []

    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)

        # Convert BGR to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Resize to a common size
        img = cv2.resize(img, target_size)

In [None]:
# Normalize pixel values to [0, 1]
        img = img / 255.0

        images.append(img)
        labels.append(folder.split('/')[-1])

    return np.array(images), np.array(labels)

In [None]:
# Load and preprocess images
parasitized_images, parasitized_labels = load_and_preprocess_images(parasitized_dir)
uninfected_images, uninfected_labels = load_and_preprocess_images(uninfected_dir)

## Splitting data into Training, Validation and Test

In [None]:
# Encode class labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(np.concatenate((parasitized_labels, uninfected_labels)))


In [None]:
# Split data into training, validation, and test sets
X = np.concatenate((parasitized_images, uninfected_images), axis=0)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

## Utilizing pre-trained model VGG-16

In [None]:
# Load the VGG-16 model pre-trained on ImageNet
base_model = keras.applications.VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

In [None]:
# Add custom layers for classification
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    epochs=10,
    batch_size=32
)

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Visualize training history
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()