# Image Classification using CNNs

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.preprocessing import image
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # use CPU

Using TensorFlow backend.


In [2]:
# CNN expects a fixed input image dimensions
# We will rescale all our images to 150x150
img_width, img_height = 256, 256 #can reduce image dimensions if model doesn't fit GPU memory

PATH_DATA = "data/chest_xray"
# Sepcify Train and Validation datasets path
train_data_dir = PATH_DATA + '/train'
validation_data_dir = PATH_DATA + '/val'

# Specify number of train and validation samples, epochs, and batch size
nb_train_samples = 5216
nb_validation_samples = 16
epochs = 100     # We will keep epoch to 10 for quick training on Jupyterhub server, for better results use higher epoch
batch_size = 16 # The number of training examples in one forward/backward pass. 
                # The higher the batch size, the more memory space you'll need.

# Configure Keras input data format
    
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_width, img_height)
else:
    input_shape = (img_width, img_height, 1)


In [3]:
# Define CNN model

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [4]:
ADAM = Adam(lr=0.0001) #Optimiser

model.compile(loss='binary_crossentropy', # use loss = 'categorical_crossentropy', if more than two output classes
              optimizer=ADAM,
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 254, 254, 32)      320       
_________________________________________________________________
activation_1 (Activation)    (None, 254, 254, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 127, 127, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 125, 125, 32)      9248      
_________________________________________________________________
activation_2 (Activation)    (None, 125, 125, 32)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 62, 62, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 60, 60, 64)        18496     
__________

In order to make the most of our few training examples, we will "augment" them via a number of random transformations, so that our model would never see twice the exact same picture. This helps prevent overfitting and helps the model generalize better.

In [5]:
# This is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# For test data, we will only use rescaling as dataset augmentation

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    color_mode="grayscale",
    batch_size=batch_size,
    class_mode='binary') # Use class_mode='categorical', if you have more than two output classes

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    color_mode="grayscale",
    batch_size=batch_size,
    class_mode='binary') # Use class_mode='categorical', if you have more than two output classes

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


In [6]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
    verbose =1)


Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

KeyboardInterrupt: 

In [None]:
# Accuracy and Validation Graphs
plt.rcParams['figure.figsize'] = (6,5)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title( "Accuracy ")
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.close()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Error")
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()
plt.close()

In [None]:
# Let's use our trained model to make some prediction on test examples

%matplotlib inline

plt.rcParams['figure.figsize'] = (16,16) # Make the figures a bit bigger
base_path = PATH_DATA + '/test/'

classes = {0:'Cat', 1:'Dog'} # Keras assigns integer ids (0,1,...) to class labels (cat,dog,...) alphabatecally
test_files = []

for x in os.listdir(base_path): 
    if (x.endswith('.jpg')):
        test_files.append(x)

for i,x in enumerate(test_files):
    img = image.load_img(base_path + x, target_size=(img_width, img_height)) # Our trained network expects input images of 150x150 dimensions
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    output = model.predict(x)
    plt.subplot(6,4,i+1)
    plt.imshow(img)
    plt.title("Predicted: {}".format(classes[int(output[0][0])]))
    plt.axis('off')