In [5]:
import keras
keras.__version__

Using TensorFlow backend.


'2.1.2'

## Getting started 
- Download data from: https://www.kaggle.com/c/whale-categorization-playground
- Rename train.csv to targets.csv
- Rename the train directory to kaggle_train


In [6]:
import os

home_dir = os.getcwd()
fname = os.path.join(home_dir, 'targets.csv') # targets for both train and validation

f = open(fname)
data = f.read()
f.close()

lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]
lines = lines[:-1]

print(header)
print(len(lines))

['Image', 'Id']
9850


# Encoding the whale ids


In [7]:
import numpy as np
whale_ids = [line.split(',')[1] for line in lines]
whale_ids = set(whale_ids) # convert to set to remove duplicats
whale_ids = list(whale_ids) # convert back to list to make it ordered


# Am no longer using the whale2vec function, sinc the ImageDataGenerator automatically one-hot-encodes the 
# targets
whale_dict = {}
for i, whale in enumerate(whale_ids):
    vec = np.zeros(len(whale_ids))
    vec[i] = 1
    whale_dict[whale] = vec
    
def whale2vec(whale): # returns a unique one-hot encoded vector given a whale_id
    if whale in whale_dict.keys():
        return whale_dict[whale]
    else:
        print("whale not found. Returning new_whale vector")
        return whale_dict['new_whale']
               
vec = whale2vec(whale_ids[2])
print(len(whale_ids))
print(vec)

4251
[ 0.  0.  1. ...,  0.  0.  0.]


# Preprocessing images

In [8]:
import errno 

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

First we split the training images into a train and validation set, and then subdivide them into directories for each whale_id

In [9]:
import shutil
original_dataset_dir = os.path.join(home_dir, 'kaggle_train')

train_dir = os.path.join(home_dir, 'train')
mkdir_p(train_dir)

validation_dir = os.path.join(home_dir, 'validation')
mkdir_p(validation_dir)

# Let's then create a subdirectory for each whale_id in both the train and validation directories 
# so we can use the ImageDataGenerator magic function

classes_count = len(whale_ids) # During the development phase of testing models, we dont look at all 4251 classes. 
                     # When we're ready to look at all classes we can set classes_count = len(whale_ids)

for i, whale_id in enumerate(whale_ids):
    if i < classes_count:
        mkdir_p(os.path.join(train_dir, whale_id))
        mkdir_p(os.path.join(validation_dir, whale_id))

train_image_count = 0

# Copy first 7500 files into the appropriate whale directory in train dir (only if their class is included)   
for i in range(7500):
    pic = lines[i].split(',')[0]
    whale_id = lines[i].split(',')[1]
    src = os.path.join(original_dataset_dir, pic)
    whale_id_dir = os.path.join(train_dir, whale_id)
    if os.path.isdir(whale_id_dir):
        dst = os.path.join(whale_id_dir, pic)
        shutil.copyfile(src, dst)
        train_image_count += 1

validation_image_count = 0
# copy the rest into the appropriate whale directory in validation dir    (only if their class is included)  
for i in range(7500,len(lines)):
    pic = lines[i].split(',')[0]
    whale_id = lines[i].split(',')[1]
    src = os.path.join(original_dataset_dir, pic)
    whale_id_dir = os.path.join(validation_dir, whale_id)
    if os.path.isdir(whale_id_dir):
        dst = os.path.join(whale_id_dir, pic)
        shutil.copyfile(src, dst)
        validation_image_count += 1

print(train_image_count)
print(validation_image_count)

7500
2350


In [8]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

image_size = (180,180) #adjustable parameter for processed image_size. Run time should 
batch_size = 10
color_mode='rgb'

train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to image_size - define above
        target_size=image_size,
        color_mode=color_mode, # input images are RGB and grayscale, but we map them all onto 
                                # which should be suitable for whale tails anyway
        batch_size=batch_size,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
        # This is the target directory
        validation_dir,
        # All images will be resized to image_size - define above
        target_size=image_size,
        color_mode=color_mode,
        batch_size=batch_size,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='categorical')

Found 336 images belonging to 200 classes.
Found 95 images belonging to 200 classes.


In [9]:
from keras import layers
from keras import models

model = models.Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(image_size[0],image_size[1],3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(2*classes_count, activation='relu')) # I use 2*classes_count as a first guess
                                                            # should be tested to see what works best
model.add(layers.Dense(classes_count, activation='softmax'))


In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 178, 178, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 89, 89, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 87, 87, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 43, 43, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 41, 41, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 20, 20, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 18, 18, 128)       147584    
__________

In [11]:
from keras import optimizers

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

In [12]:
test_steps = int(round(train_image_count/batch_size))
validation_steps = int(round(validation_image_count/batch_size))

history = model.fit_generator(
      train_generator,
      steps_per_epoch=test_steps, # batch size is 20, 375 steps will get us through 7500 images
      epochs=5,
      validation_data=validation_generator,
      validation_steps=validation_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Using a pre-trained convolutional base


In [73]:
from keras.applications import Xception

conv_base = Xception(weights='imagenet',
                  include_top=False,
                  input_shape=(image_size[0], image_size[1], 3))

In [74]:
conv_base.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 180, 180, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 89, 89, 32)   864         input_2[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 89, 89, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 89, 89, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

The final feature map has shape `(6, 6, 2048)`. That's the feature on top of which we will stick a densely-connected classifier.

At this point, there are two ways we could proceed: 

* Running the convolutional base over our dataset, recording its output to a Numpy array on disk, then using this data as input to a 
standalone densely-connected classifier similar to those you have seen in the first chapters of this book. This solution is very fast and 
cheap to run, because it only requires running the convolutional base once for every input image, and the convolutional base is by far the 
most expensive part of the pipeline. However, for the exact same reason, this technique would not allow us to leverage data augmentation at 
all.
* Extending the model we have (`conv_base`) by adding `Dense` layers on top, and running the whole thing end-to-end on the input data. This 
allows us to use data augmentation, because every input image is going through the convolutional base every time it is seen by the model. 
However, for this same reason, this technique is far more expensive than the first one.

We will cover both techniques. Let's walk through the code required to set-up the first one: recording the output of `conv_base` on our 
data and using these outputs as inputs to a new model.

We will start by simply running instances of the previously-introduced `ImageDataGenerator` to extract images as Numpy arrays as well as 
their labels. We will extract features from these images simply by calling the `predict` method of the `conv_base` model.

In [26]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

home_dir = os.getcwd()
train_dir = os.path.join(home_dir, 'train')
validation_dir = os.path.join(home_dir, 'validation')

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 6, 6, 2048))
    labels = np.zeros(shape=(sample_count, classes_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=image_size,
        color_mode=color_mode,
        batch_size=batch_size,
        class_mode='categorical')
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    return features, labels

train_features, train_labels = extract_features(train_dir, train_image_count)
validation_features, validation_labels = extract_features(validation_dir, validation_image_count)


Found 331 images belonging to 200 classes.
Found 85 images belonging to 200 classes.


In [27]:
train_features = np.reshape(train_features,(train_image_count, 6*6*2048))
validation_features = np.reshape(validation_features, (validation_image_count, 6*6*2048))

In [31]:
from keras import models
from keras import layers
from keras import optimizers

model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_dim=6*6*2048)) # we can play around with 
model.add(layers.Dropout(0.5))
model.add(layers.Dense(classes_count, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 128)               9437312   
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 200)               25800     
Total params: 9,463,112
Trainable params: 9,463,112
Non-trainable params: 0
_________________________________________________________________


In [33]:
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
              loss='categorical_crossentropy',
              metrics=['acc'])

history = model.fit(train_features, train_labels,
                    epochs=200,
                    batch_size=20,
                    validation_data=(validation_features, validation_labels))

Train on 331 samples, validate on 85 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Ep

In [34]:
model.save('name_that_whale_1.h5')

In [8]:
from keras.models import load_model

model = load_model('name_that_whale_1.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 512)               37749248  
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 4251)              2180763   
Total params: 39,930,011
Trainable params: 39,930,011
Non-trainable params: 0
_________________________________________________________________


In [36]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [41]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(classes_count, activation='softmax'))

In [42]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Model)             (None, 6, 6, 2048)        20861480  
_________________________________________________________________
flatten_3 (Flatten)          (None, 73728)             0         
_________________________________________________________________
dense_13 (Dense)             (None, 256)               18874624  
_________________________________________________________________
dense_14 (Dense)             (None, 200)               51400     
Total params: 39,787,504
Trainable params: 18,926,024
Non-trainable params: 20,861,480
_________________________________________________________________


Before we compile and train our model, a very important thing to do is to freeze the convolutional base. "Freezing" a layer or set of layers means preventing their weights from getting updated during training. If we don't do this, then the representations that were previously learned by the convolutional base would get modified during training. Since the Dense layers on top are randomly initialized, very large weight updates would be propagated through the network, effectively destroying the representations previously learned.

In Keras, freezing a network is done by setting its trainable attribute to False:

In [43]:
conv_base.trainable = False

With this setup, only the weights from the two Dense layers that we added will be trained. That's a total of four weight tensors: two per layer (the main weight matrix and the bias vector). Note that in order for these changes to take effect, we must first compile the model. If you ever modify weight trainability after compilation, you should then re-compile the model, or these changes would be ignored.

Now we can start training our model, with the same data augmentation configuration that we used in our previous example:

In [48]:
from keras.preprocessing.image import ImageDataGenerator

batch_size = 10
# data augmentation settings
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        color_mode = color_mode,
        # All images will be resized to 150x150
        target_size=image_size,
        batch_size=batch_size,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        color_mode = color_mode,
        target_size=image_size,
        batch_size=batch_size,
        class_mode='categorical')

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['acc'])

test_steps = int(round(train_image_count/batch_size))
print('test steps: ' + str(test_steps))
validation_steps = int(round(validation_image_count/batch_size))
print('validation_steps ' + str(validation_steps))

history = model.fit_generator(
      train_generator,
      steps_per_epoch=test_steps,
      epochs=30,
      validation_data=validation_generator,
      validation_steps=validation_steps)


Found 331 images belonging to 200 classes.
Found 85 images belonging to 200 classes.
test steps: 33
validation_steps 8
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [46]:
conv_base.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 180, 180, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 89, 89, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 89, 89, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 89, 89, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

We will unfreeze the layers in block 13 and block 14 

In [47]:
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == "block13_sepconv1_act":
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

Now we can start fine-tuning our network. We will do this with the RMSprop optimizer, using a very low learning rate. The reason for using a low learning rate is that we want to limit the magnitude of the modifications we make to the representations of the 3 layers that we are fine-tuning. Updates that are too large may harm these representations.

Now let's proceed with fine-tuning:


In [49]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['acc'])

test_steps = int(round(train_image_count/batch_size))
print('test steps: ' + str(test_steps))
validation_steps = int(round(validation_image_count/batch_size))
print('validation_steps ' + str(validation_steps))

history = model.fit_generator(
      train_generator,
      steps_per_epoch=test_steps,
      epochs=100,
      validation_data=validation_generator,
      validation_steps=validation_steps)

test steps: 33
validation_steps 8
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100


## Running on test data and computing Kaggle performance metric


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

home_dir = os.getcwd()
test_dir = os.path.join(home_dir, 'test')
test_count = len(os.list_dir(test_dir))
ids_count = len(whale_ids)

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

## Want a tensor of length (test, # of whale ids)

def predict_labels(directory):
    labels = np.zeros(shape=(test_count,ids_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=image_size,
        batch_size=batch_size,
        color_mode = color_mode,
        class_mode='categorical')
    i = 0
    for inputs_batch, labels_batch in generator:
        print('Generating predictions for image batch: ' + i)
        labels_batch = model.predict(inputs_batch)
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= test_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    return  labels

test_labels = predict_labels(test_dir)

In [None]:
test_list = os.list_dir(test_dir)

# We want to write something of the form: image_name pred_0 pred_2 ... pred_4
# Need to write a function that takes a label vector as input, and outputs an ordered list of 5 
# most probable whale_ids

# Need to get the index of the top 5 values of the label vector, then convert these into whale_ids

def get_ids(label_vec):
    ids = ''
    for i in range(5):
        max_value = max(label_vec)
        max_index = label_vec.index(max_value)
        ids += whale_ids[max_index]
        ids += ' '
        label_vec[max_index] = -1
    return ids
    

prediction = 'Image,Id \n'
for i in range(test_count):
    prediction += test_list[i]
    prediction += ','
    prediction += get_ids[test_labels[i]]
    prediction += '\n'

    
print(prediction)