### Image labelling using CNN

This notebook does a transfer learning based on the VGG-16 pre-trained network.

In [1]:
# Import libraries and modules
import numpy as np
import pandas as pd
np.random.seed(123)  # for reproducibility

import time
from os import listdir
from os.path import isfile, join

import scipy.io
from PIL import Image

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from keras import Model

from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

from keras import optimizers

Using Theano backend.


### Utility methods

#### Getting metadata out of Matlab file

In [2]:
# Method to read picture information from a matlab file
def load_img_from_mat(mat_file):

    mat_data = scipy.io.loadmat(mat_file)
    # load annotations for each picture
    dogs_annotation = []
    
    for img_annotation in mat_data['annotation_list']:
        dog_annotation = str(img_annotation[0][0])
        dogs_annotation.append(dog_annotation)

    # load picture filenames and path for each picture contained in mat file
    dogs_full_path = []
    dogs_folder = []
    dogs_file = []

    for img_file in mat_data['file_list']:
        dog_full_path = str(img_file[0][0])
        folder, filen =  dog_full_path.split('/')
        dogs_full_path.append(dog_full_path)
        dogs_folder.append(folder)
        dogs_file.append(filen)

    # Create dataframe with these informations
    file_df = pd.DataFrame({'file' : dogs_file,
                             'folder' : dogs_folder,
                             'full_path' : dogs_full_path,
                             'annotation' : dogs_annotation})
    
    # Load label information and create a complete dataframe
    labels_df = pd.DataFrame(mat_data['labels'], columns = ['label'])
    file_df = file_df.join(labels_df)
    
    return file_df

#### Reading pictures and storing them into an array

In [3]:
# Method to read a file picture and return it as VGG-16 requires
def load_img_file(variant, img_folder, img_file):
    picture_file = join(variant, img_folder, img_file)

    # Load picture and resez all to 224x224 pixels
    img = load_img(picture_file, target_size=(224, 224))
    
    # Picture has to be converted into Numpy array
    img = img_to_array(img)
    
    # Preprocess input as VGG-16 requires it
    img = preprocess_input(img)
    
    return img

In [4]:
# Create picture array
def get_pict_array(pict_df):
    # Variable to hold all pictures
    X_pict_array = []
    first = True
    for dog_folder, dog_file in np.array(pict_df[['folder', 'file']]):
        img = load_img_file(images, dog_folder, dog_file)
        if first:
            X_pict_array = [img]
            first = False
        else:
            X_pict_array += [img]
            
    return np.array(X_pict_array)

### Main execution

In [5]:
# Define data directories
my_data = "../data"
images = my_data + '/Images' # Images folder
bw = my_data + '/BWEqu'

In [6]:
# Parameters

# Number of dogs breeds to include
nb_breeds = 10

# Fine tuning strategy (values: full, partial, feature)
strategy = 'partial'

# number of epochs for fine tuning
epochs = 10

#### Loading data

In [7]:
# Load train data
train_df = load_img_from_mat(my_data + '/train_list.mat')
sample_train_df = train_df[train_df['label'] <= nb_breeds]

X_train = get_pict_array(sample_train_df)
Y_train = sample_train_df['label'] - 1

# Load test data
test_df = load_img_from_mat(my_data + '/test_list.mat')
sample_test_df = test_df[test_df['label'] <= nb_breeds]

X_test = get_pict_array(sample_test_df)
Y_test = sample_test_df['label'] - 1

In [8]:
# Checking imported data. shape returns 
# - the number of pictures
# - The byte depth: 3 = color picture
# - the size in pixels

print(X_train.shape)


(1000, 3, 224, 224)


#### Preprocessing data

In [9]:
# Normalize all values to be between 0 and 1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
 

In [10]:
# Preprocess class labels to create a matrix
Y_train = np_utils.to_categorical(Y_train, nb_breeds)
Y_test = np_utils.to_categorical(Y_test, nb_breeds)
 

In [11]:
# Import the VGG-16 network pre-trained with Imagenet. Remove upper layer.
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(3, 224, 224))

# get the ouput of the model to build on top of it
x = base_model.output

# Add a Flatten layer to adapt the data format to the Dense layer requirements
x = Flatten()(x)
x = Dense(4096, activation='relu')(x)
x = Dense(4096, activation='relu')(x)

# Add a new fully connected layer with numer of classes depending on the number of labels
predictions = Dense(nb_breeds, activation='softmax')(x)

# Finally define the new model
model = Model(inputs=base_model.input, outputs=predictions)

In [12]:
# Stategy choice : feature extraction only, partial fine tuning or complete fine tuning
# Here : complete fine tuning, all layers are trainanble
for layer in base_model.layers:
    layer.trainable = False

if strategy == 'full':
    for layer in base_model.layers:
        layer.trainable = True
    
if strategy == 'partial':
    for layer in base_model.layers[6:]:
        layer.trainable = True

In [13]:
# Compile model
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.0001, momentum=0.9),
              metrics=['accuracy'])
 

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3, 224, 224)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 224, 224)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 224, 224)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 112, 112)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 112, 112)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 112, 112)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 128, 56, 56)       0         
__________

In [15]:
model.layers[19].input_shape

(None, 512, 7, 7)

In [16]:
# Fit model on training data
start = time.time()

model.fit(X_train, Y_train, 
          batch_size=32, epochs=epochs, verbose=1)
end = time.time()
print('Fitting time {0} seconds.'.format(end - start))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fitting time 4261.647929430008 seconds.


In [17]:
# Evaluate model on test data
score = model.evaluate(X_test, Y_test, verbose=0)

In [18]:
print("Accuracy score for {0} breeds, {1} strategy and {2} epochs: {3:.1%}".format(nb_breeds, strategy, epochs, score[1]))


Accuracy score for 10 breeds, partial strategy and 10 epochs: 69.9%


In [19]:
#Pickle objects
import pickle

#dump the model
model_name = 'vgg16-{0}-{1}-{2}.pkl'.format(nb_breeds, strategy, epochs)
print('Dumping model {0}'.format(model_name))
output = open(model_name, 'wb')
pickle.dump(model, output, -1)
output.close()


Dumping model vgg16-10-partial-10.pkl
