In [None]:
import os
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.src.legacy.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.applications import EfficientNetB0
from keras.applications import ResNet50
from keras.optimizers import Adam


In [12]:
# Step 1: Extract the dataset
extracted_path = 'dataset/'

In [13]:
# Step 2: Load and split the dataset
image_size = (128, 128)
batch_size = 32

train_csv_path = os.path.join(extracted_path, 'train_data.csv')
train_image_dir = os.path.join(extracted_path, 'train_images')

test_image_dir = os.path.join(extracted_path, 'test_images')

test_filenames = os.listdir(test_image_dir)
test_df = pd.DataFrame({'img_name': test_filenames})

train_df = pd.read_csv(train_csv_path)
train_df.columns = train_df.columns.str.strip().str.lower()
train_df.rename(columns={'img_name': 'img_name', 'label': 'label'}, inplace=True)
train_df['label'] = train_df['label'].astype(str)

In [14]:
# Split into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Use ImageDataGenerator for data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_image_dir,
    x_col='img_name',
    y_col='label',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=train_image_dir,
    x_col='img_name',
    y_col='label',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_image_dir,
    x_col='img_name',
    target_size=image_size,
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

Found 17636 validated image filenames belonging to 2 classes.
Found 4410 validated image filenames belonging to 2 classes.
Found 5512 validated image filenames.


In [15]:
# Step 3: Build a CNN model with EfficientNetB0
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False  # Freeze the base model layers for transfer learning

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


In [21]:
# Step 3: Build a CNN model with ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False  # Freeze the base model layers for transfer learning

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1us/step


In [16]:
# Step 3: Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
# Step 3: Build a model using a pre-trained network
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

for layer in base_model.layers:
    layer.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [23]:
# Step 4: Train the model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator
)

print("Model training complete!")

Epoch 1/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 314ms/step - accuracy: 0.5221 - loss: 0.6923 - val_accuracy: 0.5100 - val_loss: 0.6830
Epoch 2/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 306ms/step - accuracy: 0.5455 - loss: 0.6863 - val_accuracy: 0.6297 - val_loss: 0.6713
Epoch 3/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 303ms/step - accuracy: 0.5519 - loss: 0.6842 - val_accuracy: 0.6429 - val_loss: 0.6669
Epoch 4/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 299ms/step - accuracy: 0.5693 - loss: 0.6814 - val_accuracy: 0.5612 - val_loss: 0.6684
Epoch 5/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 297ms/step - accuracy: 0.5705 - loss: 0.6795 - val_accuracy: 0.5431 - val_loss: 0.6704
Epoch 6/20
[1m552/552[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 301ms/step - accuracy: 0.5847 - loss: 0.6782 - val_accuracy: 0.5549 - val_loss: 0.6645
Epoc

In [24]:
# Save the model
model_save_path = os.path.join(extracted_path, 'malaria_detection_model.h5')
model.save(model_save_path)
print(f"Model saved to {model_save_path}")



Model saved to dataset/malaria_detection_model.h5


In [None]:
from keras.models import Sequential, load_model
# Load the model for prediction
model = load_model("dataset/malaria_detection_model.h5")

In [None]:
# Step 5: Make predictions on the test set
predictions = model.predict(test_generator)

In [None]:
# Step 6: Create a submission CSV
test_df['label'] = predictions.flatten()
test_df['label'] = (test_df['label'] > 0.5).astype(int)

submission_path = os.path.join(extracted_path, 'test_data.csv')
test_df.to_csv(submission_path, index=False)

print(f"Test predictions saved to {submission_path}!")