### Load libraries

In [1]:
import os, shutil

### Path for original dataset

In [2]:
original_data_dir_inf = '/home/enock/Documents/Tensorflow_Keras_projects/cell_images/Parasitized'

In [3]:
original_data_dir_uni = '/home/enock/Documents/Tensorflow_Keras_projects/cell_images/Uninfected'

### Path for data splits

In [4]:
data_dir = '/home/enock/Documents/Tensorflow_Keras_projects/malaria_dataset/'
os.mkdir(data_dir)

### Create data split directories 

In [5]:
train_dir = os.path.join(data_dir, 'train')
os.mkdir(train_dir)

validation_dir = os.path.join(data_dir, 'validation')
os.mkdir(validation_dir)

test_dir = os.path.join(data_dir, 'test')
os.mkdir(test_dir)

#### Training directory

In [6]:
### training directories for infected and uninfected
train_infected_dir = os.path.join(train_dir, 'infected')
os.mkdir(train_infected_dir)

train_uninfected_dir = os.path.join(train_dir, 'uninfected')
os.mkdir(train_uninfected_dir)

#### Validation directory

In [7]:
### validation directories for infected and uninfected
validation_infected_dir = os.path.join(validation_dir, 'infected')
os.mkdir(validation_infected_dir)

validation_uninfected_dir = os.path.join(validation_dir, 'uninfected')
os.mkdir(validation_uninfected_dir)

#### Test directory

In [8]:
### test directories for infected and uninfected
test_infected_dir = os.path.join(test_dir, 'infected')
os.mkdir(test_infected_dir)

test_uninfected_dir = os.path.join(test_dir, 'uninfected')
os.mkdir(test_uninfected_dir)

### Copy images to training, validation and test directories

In [9]:
### copy infected images to train directory
images = ['infected{}.png'.format(i) for i in range(9922)]
for img in images:
    img_src = os.path.join(original_data_dir_inf, img)
    img_dst = os.path.join(train_infected_dir, img)
    shutil.copyfile(img_src, img_dst)

### copy uninfected images to validation directory
images = ['infected{}.png'.format(i) for i in range(9922, 11024)]
for img in images:
    img_src = os.path.join(original_data_dir_inf, img)
    img_dst = os.path.join(validation_infected_dir, img)
    shutil.copyfile(img_src, img_dst)

### copy infected images to test directory
images = ['infected{}.png'.format(i) for i in range(11024, 13780)]
for img in images:
    img_src = os.path.join(original_data_dir_inf, img)
    img_dst = os.path.join(test_infected_dir, img)
    shutil.copyfile(img_src, img_dst)

In [10]:
### copy uninfected images to train directory
images = ['uninfected{}.png'.format(i) for i in range(9922)]
for img in images:
    img_src = os.path.join(original_data_dir_uni, img)
    img_dst = os.path.join(train_uninfected_dir, img)
    shutil.copyfile(img_src, img_dst)
    
### copy uninfected images to validation directory
images = ['uninfected{}.png'.format(i) for i in range(9922, 11024)]
for img in images:
    img_src = os.path.join(original_data_dir_uni, img)
    img_dst = os.path.join(validation_uninfected_dir, img)
    shutil.copyfile(img_src, img_dst)
    
### copy uninfected images to test directory
images = ['uninfected{}.png'.format(i) for i in range(11024, 13780)]
for img in images:
    img_src = os.path.join(original_data_dir_uni, img)
    img_dst = os.path.join(test_uninfected_dir, img)
    shutil.copyfile(img_src, img_dst)

#### Check images in each directory

In [11]:
print('Train infected images: ', len(os.listdir(train_infected_dir)))
print('Validation infected images: ', len(os.listdir(validation_infected_dir)))
print('Test infected images:' ,len(os.listdir(test_infected_dir)))
print('Train uninfected images: ',len(os.listdir(train_uninfected_dir)))
print('Validation uninfected images: ',len(os.listdir(validation_uninfected_dir)))
print('Test uninfected images:' ,len(os.listdir(test_uninfected_dir)))

Train infected images:  9922
Validation infected images:  1102
Test infected images: 2756
Train uninfected images:  9922
Validation uninfected images:  1102
Test uninfected images: 2756


#### Load libraries

In [12]:
from keras                                   import models, layers, optimizers
from keras.callbacks                         import ReduceLROnPlateau
from keras.applications.inception_resnet_v2  import InceptionResNetV2, preprocess_input
from keras.preprocessing.image               import ImageDataGenerator

Using TensorFlow backend.


### Data Augmentation
Augment only the training data

In [13]:
train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1/255.0,
    rotation_range=25,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest')

In [14]:
train_img = train_gen.flow_from_directory(
    train_dir,
    target_size=(75, 75),
    color_mode="rgb",
    batch_size=32,
    interpolation='bicubic',
    shuffle=True,
    class_mode='binary')

Found 19844 images belonging to 2 classes.


In [15]:
validation_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1/255.0)

In [16]:
validation_img = validation_gen.flow_from_directory(
    validation_dir,
    target_size=(75, 75),
    color_mode="rgb",
    batch_size=32,
    interpolation='bicubic',
    shuffle=False,
    class_mode='binary')

Found 2204 images belonging to 2 classes.


### Build and Train Model

#### Instantiate pre-trained model base

In [17]:
pmodel_base = InceptionResNetV2(weights='imagenet', include_top=False, input_shape= (75, 75, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


#### Add a dense layer to the pre-trained model base

In [18]:
model = pmodel_base.output
model = layers.Flatten()(model)
model = layers.Dropout(0.5)(model)
model_output = layers.Dense(1, activation='sigmoid')(model)
final_model = models.Model(inputs=pmodel_base.input, outputs=model_output)

final_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 37, 37, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 37, 37, 32)   96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 37, 37, 32)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

__________________________________________________________________________________________________
conv2d_95 (Conv2D)              (None, 3, 3, 160)    143360      activation_94[0][0]              
__________________________________________________________________________________________________
batch_normalization_95 (BatchNo (None, 3, 3, 160)    480         conv2d_95[0][0]                  
__________________________________________________________________________________________________
activation_95 (Activation)      (None, 3, 3, 160)    0           batch_normalization_95[0][0]     
__________________________________________________________________________________________________
conv2d_93 (Conv2D)              (None, 3, 3, 192)    208896      block17_4_ac[0][0]               
__________________________________________________________________________________________________
conv2d_96 (Conv2D)              (None, 3, 3, 192)    215040      activation_95[0][0]              
__________

__________________________________________________________________________________________________
block17_8_mixed (Concatenate)   (None, 3, 3, 384)    0           activation_105[0][0]             
                                                                 activation_108[0][0]             
__________________________________________________________________________________________________
block17_8_conv (Conv2D)         (None, 3, 3, 1088)   418880      block17_8_mixed[0][0]            
__________________________________________________________________________________________________
block17_8 (Lambda)              (None, 3, 3, 1088)   0           block17_7_ac[0][0]               
                                                                 block17_8_conv[0][0]             
__________________________________________________________________________________________________
block17_8_ac (Activation)       (None, 3, 3, 1088)   0           block17_8[0][0]                  
__________

__________________________________________________________________________________________________
block17_19 (Lambda)             (None, 3, 3, 1088)   0           block17_18_ac[0][0]              
                                                                 block17_19_conv[0][0]            
__________________________________________________________________________________________________
block17_19_ac (Activation)      (None, 3, 3, 1088)   0           block17_19[0][0]                 
__________________________________________________________________________________________________
conv2d_154 (Conv2D)             (None, 3, 3, 128)    139264      block17_19_ac[0][0]              
__________________________________________________________________________________________________
batch_normalization_154 (BatchN (None, 3, 3, 128)    384         conv2d_154[0][0]                 
__________________________________________________________________________________________________
activation

#### Fine-tune model

In [19]:
for layer in final_model.layers[:2]:
    layer.trainable = False
for layer in final_model.layers[2:]:
    layer.trainable = True

#### Compile model

In [22]:
final_model.compile(loss='binary_crossentropy',
    optimizer=optimizers.SGD(momentum=0.9, nesterov=True),
    metrics=['binary_accuracy'])

#### Model fit

In [24]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-5)
history = final_model.fit_generator(
    train_img,
    steps_per_epoch=train_img.samples// 32,
    epochs=40,
    validation_data=validation_img,
    validation_steps=validation_img.samples//32,
    callbacks=[reduce_lr])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
110/620 [====>.........................] - ETA: 18:15 - loss: 0.0952 - binary_accuracy: 0.9673

KeyboardInterrupt: 

### Evaluate on test data

In [None]:
test_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1./255)

In [None]:
test_generator = test_gen.flow_from_directory(
    test_dir,
    target_size=(75, 75),
    color_mode="rgb",
    batch_size=32,
    interpolation='bicubic',
    shuffle=False,
    class_mode='binary')

In [None]:
test_generator.reset()
test_loss, test_acc = final_model.predict_generator(
        test_generator,
        steps=test_generator.samples//32)

#### Print model accuracy 

In [None]:
print('test_acc:', test_acc)