In [1]:
# data source: https://ceb.nlm.nih.gov/repositories/malaria-datasets/
# The dataset contains a total of 27,558 cell images with equal instances of parasitized and uninfected cells.
%reset -f

from keras.models import Sequential # to initialize CNN as a sequence of layers
from keras.layers import Convolution2D # for convolutional operations
from keras.layers import MaxPooling2D # for pooling operations
from keras.layers import Flatten # to flatten stacked feature maps into input layer
from keras.layers import Dense # to build fully-connected layers in a traditional neural network
from keras.layers import Dropout # to build a dropout layer(s); helps prevent overfitting
from keras.layers import BatchNormalization # to avoid internal covariate shift and speed up training

import numpy as np
np.random.seed(123)

In [2]:
##### set some hyperparameters
num_epochs = 3
batchsize = 128
rescaled_size = 64

In [3]:
##### part 1: building the CNN

In [4]:
### initialize the CNN
classifier = Sequential()

In [5]:
### step 1: add a convolution layer to the CNN
classifier.add(Convolution2D(filters=32, # 32 filters/kernels
                             kernel_size=(3,3), # each kernel has a 3x3 receptive field
                             padding='same', # spatial dimensions of input image and feature map are the same
                             input_shape=(64,64,3), # each input image has width=height=64, and 3 color channels
                             strides=(1,1), # stride 1 pixel at a time, along the width and height
                             activation='relu')) # apply ReLU activation function to the conv layer element-wise

In [6]:
### step 2: add a pooling layer to the CNN
classifier.add(MaxPooling2D(pool_size=(2,2), # pooling kernel has a 2x2 receptive field
                            strides=(2,2))) # stride 2 pixels at a time, along the width and height

In [7]:
### step 3: add a batch normalization layer to the CNN
classifier.add(BatchNormalization())

In [8]:
### step 4: repeat steps 1 to 3 for a deep CNN
classifier.add(Convolution2D(filters=32, # 32 filters
                             kernel_size=(3,3), # each kernel has a 3x3 receptive field
                             padding='same', # spatial dimensions of input image and feature map are the same
                             strides=(1,1), # stride 1 pixel at a time, along the width and height
                             activation='relu')) # apply ReLU activation function to the conv layer element-wise

classifier.add(MaxPooling2D(pool_size=(2,2), # pooling kernel has a 2x2 receptive field
                            strides=(2,2))) # stride 2 pixels at a time, along the width and height

classifier.add(BatchNormalization())

In [9]:
### step 5: flatten the pooled feature maps into a vector of neurons (to be used as an input layer in a classic NN)
classifier.add(Flatten())

In [10]:
### step 6: fully connect the input layer to a hidden layer and output layer (classic NN)
classifier.add(Dropout(0.5)) # dropout layer (with 50% probability of shutting down any given neuron) prevents overfitting

classifier.add(Dense(units=128, # 128 neurons in the hidden layer
                     activation='relu'))

classifier.add(Dropout(0.5)) # dropout layer (with 50% probability of shutting down any given neuron) prevents overfitting

classifier.add(Dense(units=1, # classification problem is binary
                     activation='sigmoid')) # if classification problem was multiclass, we'd use softmax

In [11]:
### compile the CNN
classifier.compile(optimizer='adam', # stochastic gradient descent algorithm (to optimize the weights)
                   loss='binary_crossentropy',
                   metrics=['accuracy']) # alternative: crossentropy

In [12]:
##### part 2: fitting the CNN to the training set

In [13]:
### prevent overfitting by augmenting the number of images with transformed (e.g. zoomed, sheared) versions of those images
from keras.preprocessing.image import ImageDataGenerator

import os
os.chdir('D:\cell_images')

train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory('training_set',
                                                 target_size=(rescaled_size,rescaled_size), # all images will be resized to 64x64 pixels
                                                 batch_size=batchsize, # weights are updated every time batchsize images have been fed into the CNN
                                                 class_mode='binary') # 'categorical' if multiclass classification

test_set = test_datagen.flow_from_directory('test_set',
                                            target_size=(rescaled_size,rescaled_size),
                                            batch_size=batchsize,
                                            class_mode='binary')

classifier.fit_generator(training_set,
                         epochs=num_epochs, # number of times you want to feed all the training images into the CNN
                         validation_data=test_set)

Found 17558 images belonging to 2 classes.
Found 10000 images belonging to 2 classes.
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x156ef5049b0>

In [14]:
### save model
#classifier.save('flowers - model.h5')

### load model
#from keras.models import load_model
#classifier = load_model('flowers - model.h5')