In [1]:
import numpy as np
import keras
import pickle
from keras.utils.io_utils import HDF5Matrix
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam
from keras.metrics import binary_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.models import load_model
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
import itertools
from functions import plot_metrics, plot_images, class_weights_from_labels, f1, plot_f1
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
keras.__version__

'2.1.6'

In [3]:
datapath = '/Users/administrator/Desktop/BENTHOZ-2015/mlb_data.hdf5'

train_img = HDF5Matrix(datapath, 'train_img')
train_labels = HDF5Matrix(datapath, 'train_labels')

val_img = HDF5Matrix(datapath, 'val_img')
val_labels = HDF5Matrix(datapath, 'val_labels')

test_img = HDF5Matrix(datapath, 'test_img')
test_labels = HDF5Matrix(datapath, 'test_labels')

In [4]:
batch_size = 32

train_batches = ImageDataGenerator(rescale=1./255).flow(train_img, train_labels, batch_size=batch_size)
val_batches = ImageDataGenerator(rescale=1./255).flow(val_img, val_labels, batch_size=batch_size)
test_batches = ImageDataGenerator(rescale=1./255).flow(test_img, test_labels, batch_size=batch_size)

In [5]:
with open('/Users/administrator/Desktop/BENTHOZ-2015/classes.txt', "rb") as fp:
    classes = pickle.load(fp)

In [6]:
train_top_only = True

vgg16_model_temp = keras.applications.vgg16.VGG16()
vgg16_model = Sequential()
for layer in vgg16_model_temp.layers[:-1]:
    vgg16_model.add(layer)
del vgg16_model_temp
if train_top_only:
    for layer in vgg16_model.layers:
        layer.trainable = False
vgg16_model.add(Dense(len(classes), activation='sigmoid')) #use sigmoid in multi-label problems

In [7]:
vgg16_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [8]:
learning_rate = 1e-4
steps_per_epoch = train_img.shape[0] // batch_size #198 to go through all examples
val_steps = 10 #32*10 = 320 examples for validation --> do not use all exanples --> reduce time of computation
class_weights = class_weights_from_labels(train_labels)
epochs = 100

In [9]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_f1', min_delta=1e-4, patience=10, verbose=1, 
                                               mode='max')
filepath = '/Users/administrator/Desktop/BENTHOZ-2015/model/mlb_classifier_V2.h5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_f1', verbose=1, save_best_only=True, 
                                             save_weights_only=False, mode='max', period=1)

In [10]:
#use binary crossentropy in multi-label problems
vgg16_model.compile(Adam(lr=learning_rate), loss='binary_crossentropy', metrics=['accuracy', f1])

In [None]:
history = vgg16_model.fit_generator(train_batches, steps_per_epoch=steps_per_epoch, 
                                    epochs=epochs, validation_data=val_batches, 
                                    validation_steps=val_steps, class_weight=class_weights,
                                    callbacks=[checkpoint, early_stopping], verbose=1)

Epoch 1/100

Epoch 00001: val_f1 improved from -inf to 0.24457, saving model to /Users/administrator/Desktop/BENTHOZ-2015/model/mlb_classifier_V2.h5
Epoch 2/100

In [None]:
plot_metrics(history.history)

In [None]:
test_steps = 20
test_labels = np.zeros((test_steps*batch_size, len(classes)))
for i in range(test_steps):
    test_imgs, test_labels_temp = next(test_batches)
    test_labels[i*batch_size:(i+1)*batch_size] = test_labels_temp
predictions = vgg16_model.predict_generator(test_batches, steps=test_steps, verbose=0)

In [None]:
plot_f1(test_labels, preds, average='samples')