# Malaria Detection: Train

In [1]:
# load and scale images using keras
# scale pixel values to a range of 0 - 1
# get image processors for training, testing and evaluation
# classes will be extracted automatically from subdirectories

import keras
from keras.preprocessing.image import ImageDataGenerator

trainDir = './trainData'
testDir  = './testData'

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        trainDir,
        target_size=(64, 64),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        testDir,
        target_size=(64, 64),
        batch_size=32,
        class_mode='binary')


Using TensorFlow backend.


Found 19290 images belonging to 2 classes.
Found 5512 images belonging to 2 classes.


In [2]:
# define network architecture
# we will use a similar architecture as th VGG net

import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD

import tensorflow as tf

model = Sequential()
# input: 64x64 images with 3 channels -> (64, 64, 3) tensors.
# this applies 32 convolution filters of size 3x3 each.
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
#model.add(Dense(2))
#model.add(Activation(tf.nn.softmax))
#model.add(Dense(2, activation='softmax'))
model.add(Dense(1, activation='sigmoid'))


# add accurary as metrics
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [3]:
# best run over night -> takes some time!
#
# during development, it turned out that
# the loss converges to a minimum after 25 epochs
epochs = 25

# train the net
history = model.fit_generator(
            train_generator,
            steps_per_epoch = train_generator.samples / train_generator.batch_size,
            epochs = epochs,
            validation_data = validation_generator,
            validation_steps = validation_generator.samples / validation_generator.batch_size
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [4]:
import pickle

# save model to disc
model.save('malaria_model.h5')  # creates a HDF5 file

# save training history
with open('malaria_history.pickle', 'wb') as f:
    pickle.dump([history.history], f)