In [38]:
import os, shutil
import time
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
import numpy as np
from PIL import Image
from scipy import ndimage
from keras.preprocessing.image import ImageDataGenerator, array_to_img
import tensorflow as tf
np.random.seed(123)

In [39]:
train_folder = 'data/chest_xray/chest_xray/train'
test_folder = 'data/chest_xray/chest_xray/val'
val_folder = 'data/chest_xray/chest_xray/test'

In [40]:
datagen = ImageDataGenerator(rescale=1./255) 
batch_size = 10

In [41]:
from keras.applications import VGG19
cnn_base = VGG19(weights='imagenet', 
                 include_top=False, 
                 input_shape=(244, 244, 3))

In [42]:
cnn_base.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 244, 244, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 244, 244, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 244, 244, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 122, 122, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 122, 122, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 122, 122, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 61, 61, 128)       0     

In [43]:
def extract_features(directory, sample_amount):
    features = np.zeros(shape=(sample_amount, 2, 2, 512)) 
    labels = np.zeros(shape=(sample_amount))
    generator = datagen.flow_from_directory(
        directory, target_size=(224, 224), 
        batch_size = 10, 
        class_mode='binary')
    i=0
    for inputs_batch, labels_batch in generator:
        features_batch = cnn_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch 
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i = i + 1
        if i * batch_size >= sample_amount:
            break
    return features, labels

In [44]:
train_features, train_labels = extract_features(train_folder, 5216) 
validation_features, validation_labels = extract_features(val_folder, 624) 
test_features, test_labels = extract_features(test_folder, 16)

train_features = np.reshape(train_features, (5216, 2 * 2 * 512))
validation_features = np.reshape(validation_features, (624, 2 * 2 * 512))
test_features = np.reshape(test_features, (16, 2 * 2 * 512))

Found 5216 images belonging to 2 classes.


ValueError: could not broadcast input array from shape (10,7,7,512) into shape (10,2,2,512)

In [15]:
weight_for_0 = 3875/1341
weight_for_1 = 1

class_weight = {0: (3875/1341), 1: 1}

print(weight_for_0)
print(weight_for_1)

2.889634601043997
1


In [16]:
from keras import models
from keras import layers
from keras import optimizers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=2*2*512))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(lr=1e-4),
              loss='binary_crossentropy',
              metrics=['acc', tf.keras.metrics.Recall()])

history = model.fit(train_features, train_labels,
                    epochs=20,
                    class_weight = class_weight,
                    batch_size=10,
                    validation_data=(validation_features, validation_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
results_train = model.evaluate(train_features, train_labels)
results_val = model.evaluate(validation_features, validation_labels)



In [18]:
pred_train_y = model.predict_classes(train_features)
pred_val_y = model.predict_classes(validation_features)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [19]:
from sklearn.metrics import classification_report
print(classification_report(train_labels, pred_train_y))
print(classification_report(validation_labels, pred_val_y))

              precision    recall  f1-score   support

         0.0       0.90      0.98      0.94      1341
         1.0       0.99      0.96      0.98      3875

    accuracy                           0.97      5216
   macro avg       0.94      0.97      0.96      5216
weighted avg       0.97      0.97      0.97      5216

              precision    recall  f1-score   support

         0.0       0.94      0.49      0.64       234
         1.0       0.76      0.98      0.86       390

    accuracy                           0.80       624
   macro avg       0.85      0.73      0.75       624
weighted avg       0.83      0.80      0.78       624



**Feature Extraction Method 2**

In [20]:
model = models.Sequential()
model.add(cnn_base)
model.add(layers.Flatten())
model.add(layers.Dense(132, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

**Freezing**

In [21]:
# You can check whether a layer is trainable (or alter its setting) through the layer.trainable attribute
for layer in model.layers:
    print(layer.name, layer.trainable)
    
# Similarly, you can check how many trainable weights are in the model
print(len(model.trainable_weights))

vgg19 True
flatten True
dense_2 True
dense_3 True
36


In [22]:
cnn_base.trainable = False

In [23]:
# You can check whether a layer is trainable (or alter its setting) through the layer.trainable attribute
for layer in model.layers:
    print(layer.name, layer.trainable)
    
# Similarly, we can check how many trainable weights are in the model
print(len(model.trainable_weights))

vgg19 False
flatten True
dense_2 True
dense_3 True
4


In [24]:
# Get all the data in the directory split/train (542 images), and reshape them
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=40, 
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   shear_range=0.2, 
                                   zoom_range=0.2, 
                                   horizontal_flip=True, 
                                   fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(train_folder,  
                                                    target_size=(64, 64),  
                                                    batch_size= 20, 
                                                    class_mode= 'binary') 

# Get all the data in the directory split/validation (200 images), and reshape them
val_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(val_folder,  
                                                                       target_size=(64, 64),  
                                                                       batch_size=20, 
                                                                       class_mode='binary')

# Get all the data in the directory split/test (180 images), and reshape them
test_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(test_folder,  
                                                                        target_size=(64, 64), 
                                                                        batch_size=180,
                                                                        class_mode='binary')

test_images, test_labels = next(test_generator)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


In [25]:

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['acc', tf.keras.metrics.Recall()])

In [26]:
history = model.fit_generator(train_generator,
                              steps_per_epoch=27,
                              epochs=10,
                              class_weight = class_weight,
                              validation_data=val_generator,
                              validation_steps=10)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
results2_train = model.evaluate(train_generator)
results2_val = model.evaluate(val_generator)



In [None]:
train_acc = history.history['acc']
val_acc = history.history['val_acc']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epch = range(1, len(train_acc) + 1)
plt.plot(epch, train_acc, 'g.', label='Training Accuracy')
plt.plot(epch, val_acc, 'g', label='Validation acc')
plt.title('Accuracy')
plt.legend()
plt.figure()
plt.plot(epch, train_loss, 'r.', label='Training loss')
plt.plot(epch, val_loss, 'r', label='Validation loss')
plt.title('Loss')
plt.legend()
plt.show()

**Fine Tuning**

In [32]:
cnn_base.trainable = True

In [33]:

cnn_base.trainable = True
set_trainable = False
for layer in cnn_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [34]:
model.compile(loss='binary_crossentropy', 
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['accuracy', tf.keras.metrics.Recall()])

In [35]:
history = model.fit_generator(train_generator,
                              steps_per_epoch=27,
                              epochs=10,
                              class_weight = class_weight,
                              validation_data=val_generator,
                              validation_steps=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
#results3_train = model.evaluate(train_generator)
results3_val = model.evaluate(val_generator)



In [None]:
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epch = range(1, len(train_acc) + 1)
plt.plot(epch, train_acc, 'g.', label='Training Accuracy')
plt.plot(epch, val_acc, 'g', label='Validation acc')
plt.title('Accuracy')
plt.legend()
plt.figure()
plt.plot(epch, train_loss, 'r.', label='Training loss')
plt.plot(epch, val_loss, 'r', label='Validation loss')
plt.title('Loss')
plt.legend()
plt.show()

In [None]:
pred_train_y = model.predict_classes(train_features)
pred_val_y = model.predict_classes(validation_features)