### Imports

In [None]:
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, Activation, Dropout, Input, Flatten, Dense, GlobalAveragePooling2D, BatchNormalization, Merge
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.optimizers import SGD
from keras import backend as K
from keras.applications import VGG16, InceptionV3
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import datetime
import cv2
import glob
import os
import sys
K.set_image_dim_ordering('tf')

### Processing

In [None]:
''' Specify the paths to the data.
'''
train_dir = '../data/Training/'

''' Get image metadata
'''
meta = pd.read_csv(train_dir + 'ImageDescription.csv')
imgID = meta['image_id'].values
y_meta = meta['vis_class'].factorize()
y = list(y_meta[0])
basenames = meta['basename'].values

''' Define functions to load the training images.
'''
def resize_img(path):
    return cv2.resize(cv2.imread(path), (img_width, img_height), interpolation=cv2.INTER_LINEAR)

def load_data(data_dir, y):
    X = [resize_img(glob.glob(os.path.join(data_dir, basename))[0]) for basename in basenames]
    X = np.array(X, dtype=np.uint8).transpose((0,1,2,3)).astype('float32') / 255
    y = np_utils.to_categorical(np.array(y, dtype=np.uint8), 4)
    return X, y

''' Load images tailored for InceptionV3 input. 
However, input dimensions are required to be at 
least 299x299x3.
'''
img_width = img_height = 299
X_inc, y = load_data(train_dir, y)

''' Idem for VGG16 input. 
However, input dimensions are required to be at 
least 224x224x3.
'''
img_width = img_height = 224
X_vgg, y = load_data(train_dir, y)

''' Create a test split.
More or less 20% of the data.
'''
X_inc_val = X_inc[-900:]
X_vgg_val = X_vgg[-900:]
y_val = y[-900:]

X_inc = X_inc[:-900]
X_vgg = X_vgg[:-900]

### Create model

In [None]:
'''
Load the VGG16 and InceptionV3 ConvNets. Pre-trained Imagenet weights are loaded
into the models.
'''
print('Loading VGG16 and InceptionV3 with pre-trained weights...')
vgg = VGG16(weights='imagenet')
inception = InceptionV3(weights='imagenet')


'''
This removes the top-most layer of the VGG16 network.
Note that the function has to be runned multiple times
until the flatten layer has been reached.
'''
print('Popping layers...')
vgg.layers.pop()
vgg.layers.pop()
vgg.layers.pop()

'''
Idem.
'''
inception.layers.pop()

'''
Define the fully connected layer which merges the flatten layers of
both the VGG16 and InceptionV3 networks.
'''
stacked_model = Sequential()
stacked_model.add(Merge([vgg, inception], mode='concat', concat_axis=1))
stacked_model.add(Dense(2048))
stacked_model.add(Activation('relu'))
stacked_model.add(Dropout(0.5))
stacked_model.add(Dense(512))
stacked_model.add(Activation('relu'))
stacked_model.add(Dropout(0.5))
stacked_model.add(Dense(4))
stacked_model.add(Activation('softmax'))

'''
Because we append an untrained fully connected layer, we have to train it
exclusively, i.e.: freeze the weights of VGG16 and InceptionV3. Otherwise,
the random initialized weights of our fully connected layer causes random
error to back-propagate into the 'correct' VGG16 and InceptionV3 weights.
'''
for layer in inception.layers:
    layer.trainable = False
for layer in vgg.layers:
    layer.trainable = False

# Compile the stacked neural network. 
stacked_model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
'''
Train the fully connected layer. Earlier runs have shown that 5 epochs
are required before the network starts over-fitting.
'''
print('Fully connected layer training initialized...')
stacked_model.fit(x=[X_vgg, X_inc], y=y, validation_data=([X_vgg_val, X_inc_val], y_val), nb_epoch=3, batch_size=50)


### Training

In [None]:
'''
Now that the fully connected layer trained its weights accordingly,
we can start to unfreeze the weights of the VGG16 and InceptionV3 
networks. We will only unfreeze top layers. The bottom layers contain
very abstract feature extractors, which are definitely of use for our 
data. The top layers, however, extract more specific features that were
tailored for the data its weights were originally trained from (Imagenet).
Therefore, we only want to fine-tune top layer weights, and not bottom 
weights. We chose to fine-tune the top 2 convolutional blocks of VGG16
(block 4 and 5), and the top 2 inception modules.
'''
print('Unfreezing top-layer weights...')
for layer in inception.layers[:172]:
    layer.trainable = False
for layer in inception.layers[172:]:
    layer.trainable = True
for layer in vgg.layers[:10]:
    layer.trainable = False
for layer in vgg.layers[10:]:
    layer.trainable = True

'''
After the weights have been unfreezed, the model has to be compiled again.
This time, we will use SGD with a very low learning rate to fine-tune the
model. 
'''
stacked_model.compile(optimizer=SGD(lr=1e-4, decay=1e-6, momentum=.9, nesterov=True),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])


'''
Train the fully connected layer along with the unfreezed part of
VGG16 and InceptionV3 to fine-tune the weights to optimize the 
extraction of specific features to our data.
'''
callbacks = [EarlyStopping(monitor='val_loss', patience=7, verbose=0)]

print('Transferlearning top part of VGG16 and InceptionV3 in parallel...')
stacked_model.fit(x=[X_vgg, X_inc], y=y, validation_data=([X_vgg_val, X_inc_val], y_val), 
                  nb_epoch=100, batch_size=50, callbacks=callbacks)

### Validation

In [None]:
print('Learning done, making predictions..')
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
pred = model.predict(X_test)

print('Classification report:')
print(classification_report(y_pred=[np.argmax(p) for p in pred], y_true=[np.argmax(p) for p in y_test]))
print('')
print('Confusion matrix:')
print(confusion_matrix(y_pred=[np.argmax(p) for p in pred], y_true=[np.argmax(p) for p in y_test]))
print('')
print('Accuracy:')
print(accuracy_score(y_pred=[np.argmax(p) for p in pred], y_true=[np.argmax(p) for p in y_test]))