# Malaria Classifier with CNN

## Imports

In [None]:
# Import basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import modules for data processing 
import os
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

# Import modules to build neural network
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout,Dense
from keras.layers.normalization import BatchNormalization

# Import module to train neural network
from keras.callbacks import ModelCheckpoint

# Import modules for evaluation metrics 
from sklearn.metrics import classification_report
from sklearn.metrics import fbeta_score

# Import module to save and load trained model
from keras.models import load_model

## Process Data

### Visualize Data

In [None]:
# Load directory's path into list variables for visualization
parasitized_dir = os.listdir('Data/Parasitized/')
print('Number of images in Parasitized folder:', len(parasitized_dir))
print('Examples of the path loaded:\n', parasitized_dir[:3])

uninfected_dir = os.listdir('Data/Uninfected/')
print('\nNumber of images in Uninfected folder:', len(uninfected_dir))
print('Examples of the path loaded:\n', uninfected_dir[:3])

In [None]:
# Uninfected Data Visualization
plt.figure(figsize = (15, 15))
for i in range(4):
    plt.subplot(1, 4, i + 1)
    img = cv2.imread('Data/Uninfected' + "/" + uninfected_dir[i])
    print('Original shape:', img.shape)
    img_resize = cv2.resize(img, (224, 224))
    plt.imshow(img_resize)
    plt.title('UNINFECTED')
    plt.tight_layout()
plt.show()
print('Images has been reshape to 224 x 224')

In [None]:
# Parasitized Data Visualization
plt.figure(figsize = (15, 15))
for i in range(4):
    plt.subplot(1, 4, i + 1)
    img = cv2.imread('Data/Parasitized' + "/" + parasitized_dir[i])
    print('Original shape:', img.shape)
    img_resize = cv2.resize(img, (224, 224))
    plt.imshow(img_resize)
    plt.title('PARASITIZED')
    plt.tight_layout()
plt.show()
print('Images has been reshape to 224 x 224')

### Load data to process

In [None]:
# Load images with glob module
uninfected_data = glob('Data/Uninfected/*.png', recursive=True)
parasitized_data = glob('Data/Parasitized/*.png', recursive=True)

print(len(uninfected_data))
print(len(parasitized_data))
print(type(uninfected_data))

In [None]:
# Create empty list to hold images processed
features = []
labels = []

# Loop into the images and resize, convert them into matrix (ndarray type)
for img in uninfected_data:
    image = cv2.imread(img)
    image_resized = cv2.resize(image, (224, 224), interpolation=cv2.INTER_CUBIC)
    features.append(image_resized)
    labels.append(0)
    
for img in parasitized_data:
    image = cv2.imread(img)
    image_resized = cv2.resize(image, (224, 224), interpolation=cv2.INTER_CUBIC)
    features.append(image_resized)
    labels.append(1)

# Convert images matrix into array
features = np.array(features)
labels = np.array(labels)

In [None]:
# Split data into training and testing with train_test_split module
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.25, random_state = 42)

# Convert testing sets into binary category
y_train = to_categorical(y_train, num_classes = 2)
y_test = to_categorical(y_test, num_classes = 2)

## Build Model

In [None]:
# Define model's architecture
model = Sequential()

model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(GlobalAveragePooling2D())

model.add(Dense(500, activation='relu'))

model.add(Dense(2,activation='softmax'))

# Compile optimizer, loss and metrics
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Display model's architecture
model.summary()

## Train Model

In [None]:
# Specify the number of epochs that you would like to use to train the model.
epochs = 30

# Save model checkpoints
checkpointer = ModelCheckpoint(filepath='Saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

# Train neural network
model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, batch_size=32,
          callbacks=[checkpointer], verbose=1)

## Test Model

In [None]:
# Calculate test accuracy with the test datasets
accuracy = model.evaluate(X_test, y_test, verbose=1)
print('\nTest Accuracy:', accuracy[1])

### Metric Evaluation

In [None]:
# Calculate pretiction on the test dataset
y_pred = model.predict(X_test)

In [None]:
# Print classification report with Scikit-learn library
print(classification_report(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis = 1)))

### Precision metric evaluation with F-beta Score

In [None]:
# Calculate F-beta score with Scikit-learn library
fbeta_score(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis=1), beta=0.92)

## Save Model

In [None]:
# Save neural network moodel
model.save('Saved_models/malaria_cnn.h5')

## Sanity Check

In [1]:
# Imports
import numpy as np
import cv2
from keras.models import load_model
from keras.preprocessing import image    

Using TensorFlow backend.


In [2]:
# Load neural network model saved
model = load_model('Saved_models/malaria_cnn.h5')

In [3]:
# Input test 1, 2 - Infected
input_image1 = 'Data/Input-Data/Infected/infected2.png'
input_image2 = 'Data/Input-Data/Infected/infected4.png'

# Input test 3, 4 - Healthy
input_image3 = 'Data/Input-Data/Healthy/healthy3.png'
input_image4 = 'Data/Input-Data/Healthy/healthy5.png'

In [4]:
def path_to_tensor(img_path):
    """Convert an image into 4D tensor."""
    
    # Load image path input into a (224, 224) image
    img = image.load_img(img_path, target_size=(224, 224))
    
    # Convert the image into a 3D tensor image (224, 224, 3)
    x = image.img_to_array(img)
    
    # Convert 3D tensor to 4D tensor (1, 224, 224, 3) and return
    return np.expand_dims(x, axis=0)

In [5]:
# Calculate prediction on the test 1
test1 = path_to_tensor(input_image1)
y_pred1 = model.predict(test1)

# Print the prediction of both classes (Uninfected, Parasitized)
print('    Uninfected  Parasitized\n', y_pred1)

# Print the infected percentage prediction
infected_perc = round(y_pred1.tolist()[0][1] * 100, 2)
print('\nThe image is {}% predicted to be infected'.format(infected_perc))

    Uninfected  Parasitized
 [[  3.43399151e-04   9.99656558e-01]]

The image is 99.97% predicted to be infected


In [6]:
# Calculate prediction on the test 1
test2 = path_to_tensor(input_image2)
y_pred2 = model.predict(test2)

# Print the prediction of both classes (Uninfected, Parasitized)
print('    Uninfected  Parasitized\n', y_pred2)

# Print the infected percentage prediction
infected_perc = round(y_pred2.tolist()[0][1] * 100, 2)
print('\nThe image is {}% predicted to be infected'.format(infected_perc))

    Uninfected  Parasitized
 [[ 0.10112928  0.89887077]]

The image is 89.89% predicted to be infected


In [7]:
# Calculate prediction on the test 1
test3 = path_to_tensor(input_image3)
y_pred3 = model.predict(test3)

# Print the prediction of both classes (Uninfected, Parasitized)
print('    Uninfected  Parasitized\n', y_pred3)

# Print the infected percentage prediction
infected_perc = round(y_pred3.tolist()[0][1] * 100, 2)
print('\nThe image is {}% predicted to be infected'.format(infected_perc))

    Uninfected  Parasitized
 [[ 0.99869907  0.00130086]]

The image is 0.13% predicted to be infected


In [8]:
# Calculate prediction on the test 1
test4 = path_to_tensor(input_image4)
y_pred4 = model.predict(test4)

# Print the prediction of both classes (Uninfected, Parasitized)
print('    Uninfected  Parasitized\n', y_pred4)

# Print the infected percentage prediction
infected_perc = round(y_pred4.tolist()[0][1] * 100, 2)
print('\nThe image is {}% predicted to be infected'.format(infected_perc))

    Uninfected  Parasitized
 [[ 0.97022086  0.02977921]]

The image is 2.98% predicted to be infected
