In [1]:
import keras
from keras.applications import VGG16
from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.regularizers import l2, activity_l2,l1
from keras.utils import np_utils
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.applications.vgg16 import preprocess_input
from scipy import misc
from os import listdir
import numpy as np
import matplotlib.pyplot as plt
import scipy

Using Theano backend.
Using gpu device 1: Tesla K20c (CNMeM is disabled, cuDNN 4007)


### Generate model with VGG16 feature extractors, set trainable false

In [21]:
def getModel( output_dim ):
    # output_dim: the number of classes (int)
    # return: compiled model (keras.engine.training.Model)
    
    vgg_model = VGG16( weights='imagenet', include_top=True )
    vgg_out = vgg_model.layers[-2].output 
    
    vgg_out = Dropout(0.25)(vgg_out)
    softmax = Dense( output_dim, activation="softmax", W_regularizer = l2(0.01))( vgg_out )
    

    tl_model = Model( input=vgg_model.input, output=softmax )
    # Transfer Learning
    for layer in tl_model.layers[0:-1]:
        layer.trainable = False            

    tl_model.compile(loss= "categorical_crossentropy", optimizer="adadelta", metrics=["acc"])
    
    return tl_model

### Loading all images into 'album'

In [3]:
def loadImages(path = '/mnt/cube/UT_/Urban_tribes/'):
    album = {}
    for item in listdir(path):
        category = item[0:4]
        if category == ".ipy":
            continue
        if category not in album:
            album[category] = []
        
        img = load_img(path+item)
        img = img_to_array(img)
        img = misc.imresize(img, (224, 224))
        img = scipy.misc.imrotate(img, 180)
        album[category].append(img)
    return album

In [4]:
album = loadImages()

In [10]:
# plt.imshow(album['goth'][0][:, :, 0])
# plt.show()

### Split 'album' into dataset
Album is split into training and testing input/outputs according to number of examples attribute

In [7]:
def make_dataset(album, n_train, n_test):
    trn_inp = []
    trn_out = []
    tst_inp = []
    tst_out = []    
    keys = album.keys()
    for key in keys:
        examples = album[key]
        l = len(examples)
        idx = np.random.choice(l, n_train+n_test)
        for i in idx[:-n_test]:
            trn_inp.append(examples[i])
            trn_out.append(keys.index(key))
        for i in idx[-n_test:]:
            tst_inp.append(examples[i])
            tst_out.append(keys.index(key))
    return [trn_inp, trn_out, tst_inp, tst_out]      

In [8]:
# plt.imshow(album['bike'][0][:, :, 0])
# # plt.show()
# album.keys()

In [27]:
samCat = 16
dataset = make_dataset(album, samCat, 16)

In [28]:
len(dataset[1])

176

In [29]:
trainX = np.array(dataset[0])
trainY = np.array(dataset[1])

# Now shuffle the training data and swapaxes
idx = np.random.choice(len(trainX), len(trainX))
trainX = trainX[idx]
trainY = trainY[idx]

trainX = preprocess_input(np.float64(trainX)).swapaxes(1, 3).swapaxes(2, 3)
trainY = np_utils.to_categorical(trainY)

# Only swapaxes for testing data
testX = np.array(dataset[2])
testY = np.array(dataset[3])

testX = preprocess_input(np.float64(testX)).swapaxes(1, 3).swapaxes(2, 3)
testY = np_utils.to_categorical(testY)

In [30]:
# plt.imshow(trainX[10][0, :, :])
# plt.show()

### Fit the model

In [31]:
model = getModel(11)

In [32]:
history = model.fit(trainX, trainY, batch_size = 16, nb_epoch = 30, validation_data = (testX, testY), shuffle=True)

Train on 176 samples, validate on 176 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss for %d samples per category' % samCat)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

plt.plot(history.history['val_acc'])
plt.title('model accuracy for %d samples per category' % samCat)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
val_acc: 0.2273 2 sample
        0.25  4 sample
        0.3182 8 sample
        0.4034 16 sample
        