# Convolutional Neural Network for Malaria Cell Images 

In [1]:
# import dependencies
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import seaborn
from keras.models import Sequential
from keras.layers import Convolution2D,MaxPooling2D,Dropout,Dense,Flatten,BatchNormalization,Conv2D
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.callbacks import ModelCheckpoint
%matplotlib inline
import glob
import cv2

Using TensorFlow backend.


In [2]:
# read in malaria cell images
infected = glob.glob('../Datasets/cell_images/Parasitized/*.png')
uninfected = glob.glob('../Datasets/cell_images/Uninfected/*.png')

In [None]:
# 5,000 infected images and 5,000 uninfected images
print(f"{len(infected)} infected cell images")
print(f"{len(uninfected)} uninfected cell images")
print(f"Shape of first infected cell image: {cv2.imread(infected[0]).shape}")
print(f"Shape of first uninfected cell image: {cv2.imread(uninfected[0]).shape}")

In [None]:
# exhibit 4 sample images of infected cells
plt.figure(figsize=(12,5))
for i in range(1,5):
    plt.subplot(1,4,i)
    value = np.random.randint(100)
    image = cv2.imread(infected[value])
    plt.imshow(image)
    plt.title('Infected Image')
    plt.xticks([])
    plt.yticks([])

In [None]:
# exhibit 4 sample images of uninfected cells
plt.figure(figsize=(12,5))
for i in range(1,5):
    plt.subplot(1,4,i)
    value = np.random.randint(100)
    image = cv2.imread(uninfected[value])
    plt.imshow(image)
    plt.title('Uninfected Image')
    plt.xticks([])
    plt.yticks([])

In [12]:
# The original images consist in RGB coefficients in the 0-255, but such values would be too high for our model to process
# (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255
augmentor = ImageDataGenerator(rescale=1./255, validation_split=0.3)

In [13]:
# create the training and testing datasets
train_generator = augmentor.flow_from_directory('../Datasets/cell_images/', batch_size=32,
                                                target_size = (96,96), class_mode = 'binary', subset = 'training')
test_generator = augmentor.flow_from_directory('../Datasets/cell_images/', batch_size=32, target_size=(96,96),
                                               class_mode='binary', subset='validation')

Found 19292 images belonging to 2 classes.
Found 8266 images belonging to 2 classes.


In [None]:
model1 = Sequential()
model1.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))

# batch normalization allows each layer of a network to learn by itself a little bit more independently of other layers
# reduces overfitting
model1.add(BatchNormalization())

# max pooling helps with overfitting by reducing sample's dimensionality and allowing for assumptions 
# to be made about features contained in the sub-regions binned
model1.add(MaxPooling2D(2,2))

# dropout keeps a certain proportion of nodes from being used each go around (help prevent overfitting)
model1.add(Dropout(0.4))

model1.add(Convolution2D(32,(3,3),activation='relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(2,2))
model1.add(Dropout(0.4))
model1.add(Convolution2D(64,(3,3),activation='relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(2,2))
model1.add(Dropout(0.4))

# flatten changes the dimensions of the data to make it an input layer for the artificial neural network
model1.add(Flatten())

# A dense layer is used to change the dimensions of the vector. 
# Mathematically speaking, it applies a rotation, scaling, translation transform to the vector.
model1.add(Dense(64,activation='relu'))
model1.add(Dropout(0.4))

# The Sigmoid function takes any range real number and returns the output value which falls in the range of 0 to 1.
model1.add(Dense(1,activation='sigmoid'))

# compile the model
model1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
history_custom = model1.fit_generator(train_generator, steps_per_epoch=100,
                              epochs = 5,validation_data=test_generator, validation_steps=10)

In [None]:
model1.save("Malaria_CNN_Trained1.h5")

In [5]:
# removed a convolutional layer with 32 filters to speed up the model training process
# training and validation accuracies appear to be unstable (used 80/20 training validation split)
model2 = Sequential()
model2.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model2.add(BatchNormalization())
model2.add(MaxPooling2D(2,2))
model2.add(Dropout(0.4))

model2.add(Convolution2D(64,(3,3),activation='relu'))
model2.add(BatchNormalization())
model2.add(MaxPooling2D(2,2))
model2.add(Dropout(0.4))

model2.add(Flatten())

model2.add(Dense(64,activation='relu'))
model2.add(Dropout(0.4))
model2.add(Dense(1,activation='sigmoid'))
model2.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom2 = model2.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
model2.save("Malaria_CNN_Trained2.h5")

In [7]:
# changed the last hidden dense layer to 32 filters instead of 64
# training and validation accuracies still seem unstable (used 80/20 training validation split)
model3 = Sequential()
model3.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model3.add(BatchNormalization())
model3.add(MaxPooling2D(2,2))
model3.add(Dropout(0.4))

model3.add(Convolution2D(64,(3,3),activation='relu'))
model3.add(BatchNormalization())
model3.add(MaxPooling2D(2,2))
model3.add(Dropout(0.4))

model3.add(Flatten())

model3.add(Dense(32,activation='relu'))
model3.add(Dropout(0.4))
model3.add(Dense(1,activation='sigmoid'))
model3.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom3 = model3.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
model3.save("Malaria_CNN_Trained3.h5")

In [9]:
# added another hidden convolutional layer with 64 filters
# training and validation accuracies appear more stable now (used 80/20 training validation split)
model4 = Sequential()
model4.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model4.add(BatchNormalization())
model4.add(MaxPooling2D(2,2))
model4.add(Dropout(0.4))

model4.add(Convolution2D(64,(3,3),activation='relu'))
model4.add(BatchNormalization())
model4.add(MaxPooling2D(2,2))
model4.add(Dropout(0.4))

model4.add(Convolution2D(64,(3,3),activation='relu'))
model4.add(BatchNormalization())
model4.add(MaxPooling2D(2,2))
model4.add(Dropout(0.4))

model4.add(Flatten())

model4.add(Dense(64,activation='relu'))
model4.add(Dropout(0.4))
model4.add(Dense(1,activation='sigmoid'))
model4.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom4 = model4.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
model4.save("Malaria_CNN_Trained4.h5")

In [14]:
# training and validation accuracies appear stabilized now (used 70/30 training validation split)

model5 = Sequential()
model5.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model5.add(BatchNormalization())
model5.add(MaxPooling2D(2,2))
model5.add(Dropout(0.4))

model5.add(Convolution2D(64,(3,3),activation='relu'))
model5.add(BatchNormalization())
model5.add(MaxPooling2D(2,2))
model5.add(Dropout(0.4))

model5.add(Convolution2D(64,(3,3),activation='relu'))
model5.add(BatchNormalization())
model5.add(MaxPooling2D(2,2))
model5.add(Dropout(0.4))

model5.add(Flatten())

model5.add(Dense(64,activation='relu'))
model5.add(Dropout(0.4))
model5.add(Dense(1,activation='sigmoid'))
model5.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom5 = model5.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
model5.save("Malaria_CNN_Trained5.h5")

In [16]:
# changed the last hidden dense layer from 64 to 32 filters
# training and validation accuracies appear stabilized now (used 70/30 training validation split)

model6 = Sequential()
model6.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model6.add(BatchNormalization())
model6.add(MaxPooling2D(2,2))
model6.add(Dropout(0.4))

model6.add(Convolution2D(64,(3,3),activation='relu'))
model6.add(BatchNormalization())
model6.add(MaxPooling2D(2,2))
model6.add(Dropout(0.4))

model6.add(Convolution2D(64,(3,3),activation='relu'))
model6.add(BatchNormalization())
model6.add(MaxPooling2D(2,2))
model6.add(Dropout(0.4))

model6.add(Flatten())

model6.add(Dense(32,activation='relu'))
model6.add(Dropout(0.4))
model6.add(Dense(1,activation='sigmoid'))
model6.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom6 = model6.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
model6.save("Malaria_CNN_Trained6.h5")

In [18]:
# changed the number of filters in the layers to 32, 64, 32, 64
# training and validation accuracies appear stabilized now (used 70/30 training validation split)

model7 = Sequential()
model7.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model7.add(BatchNormalization())
model7.add(MaxPooling2D(2,2))
model7.add(Dropout(0.4))

model7.add(Convolution2D(64,(3,3),activation='relu'))
model7.add(BatchNormalization())
model7.add(MaxPooling2D(2,2))
model7.add(Dropout(0.4))

model7.add(Convolution2D(32,(3,3),activation='relu'))
model7.add(BatchNormalization())
model7.add(MaxPooling2D(2,2))
model7.add(Dropout(0.4))

model7.add(Flatten())

model7.add(Dense(64,activation='relu'))
model7.add(Dropout(0.4))
model7.add(Dense(1,activation='sigmoid'))
model7.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom7 = model7.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
model7.save("Malaria_CNN_Trained7.h5")

In [20]:
# added another hidden convolutional layer with 32 filters
# model accuracies did not increase by a significant amount (used 70/30 training validation split)
# should stick with model 7 
model8 = Sequential()
model8.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model8.add(BatchNormalization())
model8.add(MaxPooling2D(2,2))
model8.add(Dropout(0.4))

model8.add(Convolution2D(64,(3,3),activation='relu'))
model8.add(BatchNormalization())
model8.add(MaxPooling2D(2,2))
model8.add(Dropout(0.4))

model8.add(Convolution2D(32,(3,3),activation='relu'))
model8.add(BatchNormalization())
model8.add(MaxPooling2D(2,2))
model8.add(Dropout(0.4))

model8.add(Convolution2D(32,(3,3),activation='relu'))
model8.add(BatchNormalization())
model8.add(MaxPooling2D(2,2))
model8.add(Dropout(0.4))

model8.add(Flatten())

model8.add(Dense(64,activation='relu'))
model8.add(Dropout(0.4))
model8.add(Dense(1,activation='sigmoid'))
model8.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom8 = model8.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
model8.save("Malaria_CNN_Trained8.h5")

In [22]:
# changed the number of filters to 32, 64, 32, 64, 32
# model accuracies did not increase by a significant amount (used 70/30 training validation split)
# should stick with model 7 

model9 = Sequential()
model9.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model9.add(BatchNormalization())
model9.add(MaxPooling2D(2,2))
model9.add(Dropout(0.4))

model9.add(Convolution2D(64,(3,3),activation='relu'))
model9.add(BatchNormalization())
model9.add(MaxPooling2D(2,2))
model9.add(Dropout(0.4))

model9.add(Convolution2D(32,(3,3),activation='relu'))
model9.add(BatchNormalization())
model9.add(MaxPooling2D(2,2))
model9.add(Dropout(0.4))

model9.add(Convolution2D(64,(3,3),activation='relu'))
model9.add(BatchNormalization())
model9.add(MaxPooling2D(2,2))
model9.add(Dropout(0.4))

model9.add(Flatten())

model9.add(Dense(32,activation='relu'))
model9.add(Dropout(0.4))
model9.add(Dense(1,activation='sigmoid'))
model9.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom9 = model9.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
model9.save("Malaria_CNN_Trained9.h5")

In [24]:
# added another hidden convolutional layer with 32 filters
# model accuracies did not increase by a significant amount (used 70/30 training validation split)
# should stick with model 7 
model10 = Sequential()
model10.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model10.add(BatchNormalization())
model10.add(MaxPooling2D(2,2))
model10.add(Dropout(0.4))

model10.add(Convolution2D(64,(3,3),activation='relu'))
model10.add(BatchNormalization())
model10.add(MaxPooling2D(2,2))
model10.add(Dropout(0.4))

model10.add(Convolution2D(32,(3,3),activation='relu'))
model10.add(BatchNormalization())
model10.add(MaxPooling2D(2,2))
model10.add(Dropout(0.4))

model10.add(Convolution2D(32,(3,3),activation='relu'))
model10.add(BatchNormalization())
model10.add(MaxPooling2D(2,2))
model10.add(Dropout(0.4))

model10.add(Convolution2D(64,(3,3),activation='relu'))
model10.add(BatchNormalization())
model10.add(MaxPooling2D(2,2))
model10.add(Dropout(0.4))

model10.add(Flatten())

model10.add(Dense(32,activation='relu'))
model10.add(Dropout(0.4))
model10.add(Dense(1,activation='sigmoid'))
model10.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom10 = model10.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [54]:
model10.save("Malaria_CNN_Trained10.h5")

In [50]:
# removed a hidden convolutional layer
# changed number of filters to 64, 32, 64, 32, 64
# model accuracies did not increase by a significant amount (used 70/30 training validation split)
# should stick with model 7 

model11 = Sequential()
model11.add(Convolution2D(64,(3,3),activation='relu',input_shape = (96,96,3)))
model11.add(BatchNormalization())
model11.add(MaxPooling2D(2,2))
model11.add(Dropout(0.4))

model11.add(Convolution2D(32,(3,3),activation='relu'))
model11.add(BatchNormalization())
model11.add(MaxPooling2D(2,2))
model11.add(Dropout(0.4))

model11.add(Convolution2D(64,(3,3),activation='relu'))
model11.add(BatchNormalization())
model11.add(MaxPooling2D(2,2))
model11.add(Dropout(0.4))

model11.add(Convolution2D(32,(3,3),activation='relu'))
model11.add(BatchNormalization())
model11.add(MaxPooling2D(2,2))
model11.add(Dropout(0.4))

model11.add(Flatten())

model11.add(Dense(64,activation='relu'))
model11.add(Dropout(0.4))
model11.add(Dense(1,activation='sigmoid'))
model11.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom11 = model11.fit_generator(train_generator, steps_per_epoch=200,
                              epochs = 10,validation_data=test_generator, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [55]:
model11.save("Malaria_CNN_Trained11.h5")

In [51]:
# increased steps per epoch from 200 to 500 (everything else is from model 11)
# increased validation steps from 20 to 50
# model accuracies did not increase by a significant amount (used 70/30 training validation split)
# should stick with model 7 

model12 = Sequential()
model12.add(Convolution2D(64,(3,3),activation='relu',input_shape = (96,96,3)))
model12.add(BatchNormalization())
model12.add(MaxPooling2D(2,2))
model12.add(Dropout(0.4))

model12.add(Convolution2D(32,(3,3),activation='relu'))
model12.add(BatchNormalization())
model12.add(MaxPooling2D(2,2))
model12.add(Dropout(0.4))

model12.add(Convolution2D(64,(3,3),activation='relu'))
model12.add(BatchNormalization())
model12.add(MaxPooling2D(2,2))
model12.add(Dropout(0.4))

model12.add(Convolution2D(32,(3,3),activation='relu'))
model12.add(BatchNormalization())
model12.add(MaxPooling2D(2,2))
model12.add(Dropout(0.4))

model12.add(Flatten())

model12.add(Dense(64,activation='relu'))
model12.add(Dropout(0.4))
model12.add(Dense(1,activation='sigmoid'))
model12.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom12 = model12.fit_generator(train_generator, steps_per_epoch=500,
                              epochs = 10,validation_data=test_generator, validation_steps=50)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [56]:
model12.save("Malaria_CNN_Trained12.h5")

In [52]:
# increased steps per epoch from 200 to 500 (everything else is from model 7)
# increased validation steps from 20 to 50
# model accuracies became slightly unstable
# should stick with model 7 

model13 = Sequential()
model13.add(Convolution2D(32,(3,3),activation='relu',input_shape = (96,96,3)))
model13.add(BatchNormalization())
model13.add(MaxPooling2D(2,2))
model13.add(Dropout(0.4))

model13.add(Convolution2D(64,(3,3),activation='relu'))
model13.add(BatchNormalization())
model13.add(MaxPooling2D(2,2))
model13.add(Dropout(0.4))

model13.add(Convolution2D(32,(3,3),activation='relu'))
model13.add(BatchNormalization())
model13.add(MaxPooling2D(2,2))
model13.add(Dropout(0.4))

model13.add(Flatten())

model13.add(Dense(64,activation='relu'))
model13.add(Dropout(0.4))
model13.add(Dense(1,activation='sigmoid'))
model13.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history_custom13 = model13.fit_generator(train_generator, steps_per_epoch=500,
                              epochs = 10,validation_data=test_generator, validation_steps=50)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [57]:
model13.save("Malaria_CNN_Trained13.h5")

#### Model 7 is the best of the ones trained. Some other models have slightly higher accuracies, but they had an extra hidden layer. The small increase in accuracy was not high enough to justify adding another hidden layer. In this case, it is better to use the slightly simpler model.