In [8]:
%matplotlib inline

path = "../data/dl1/dogscats/"
# path = "../data/dl1/dogscats/sample/"

In [9]:
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import scipy.spatial.distance

import utils; reload(utils)
from utils import plots

from time import time

In [10]:
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16

In [11]:
from keras.models import Model
idx = [0,5,10,17,24,31,33,35,37]
# lambda_1
# maxpooling2d_1
# maxpooling2d_2
# maxpooling2d_3
# maxpooling2d_4
# maxpooling2d_5
# dense_1
# dense_2
# dense_3

def get_models(idx,base):
    layers  = []
    for i in range(len(idx)):
        print(base.layers[idx[i]].name)
        layers.append( base.layers[idx[i]] )

    models = []
    outshapes = []
    for layer in layers:
        models.append( Model(input=base.input,
                             output=base.get_layer(layer.name).output) )
        outshapes.append(list( layer.output_shape ))
        
        
    print('Done.')
    return layers,models,outshapes

In [5]:
vgg = Vgg16()
base = vgg.model
layers,models,outshapes = get_models(idx,base)

lambda_1
maxpooling2d_1
maxpooling2d_2
maxpooling2d_3
maxpooling2d_4
maxpooling2d_5
dense_1
dense_2
dense_3
Done.


### Extract representations without finetuning

#### Training set

In [None]:
# I extract the representations for the training set and the validation set separately
datapath = path + 'train/'
repath   = '../data/dl1/objnc/rep/dogscats/nofinetune/'
nbatches = 40
batch_size=50
batches = vgg.get_batches(datapath, batch_size=batch_size)
print('N.of batches ' + str(nbatches))


D = []
D_sq = []
for layer,model in zip(layers,models):
    print('Processing model : ' + layer.name)
    R = []
    batches = vgg.get_batches(datapath, batch_size=batch_size, shuffle=False)
    ti = time()
    #for n in range(nbatches):
    for n in range(nbatches):
        imgs,_ = batches.next()       
        R.append(model.predict(imgs))
    R = np.asarray(R, dtype=float) 
    outshape = list(layer.output_shape)
    outshape[0] = nbatches*batch_size
    R.shape = tuple(outshape)
    print(R.shape)
    np.save(repath +  layer.name + '_train', R)
    
    # compute distances
    R.shape = R.shape[0], -1
    d = scipy.spatial.distance.pdist(R, 'euclidean')
    d_sq = scipy.spatial.distance.squareform(d, force='no', checks=True)
    D.append(d)
    D_sq.append(d_sq)
    te = time() - ti
    print(te) 
    
np.save(repath + 'D'    + '_train', D)
np.save(repath + 'D_sq' + '_train', D_sq)
print('Done.')

Found 23000 images belonging to 2 classes.
N.of batches 40
Processing model : lambda_1
Found 23000 images belonging to 2 classes.
(2000, 3, 224, 224)
395.020223856
Processing model : maxpooling2d_1
Found 23000 images belonging to 2 classes.
(2000, 64, 112, 112)


### Extract representations with finetuning

In [13]:
vgg = Vgg16()
N = 23000
batch_size = 50
nbatches = int(N/batch_size)
batches = vgg.get_batches(path+'train', batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [15]:
vgg.finetune(batches)

In [16]:
vgg.fit(batches, val_batches, nb_epoch=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##### Observation 
I notice that it's doing strange things but this is not important since we need just to compute anew the representations of the last hidden layer - the new one inserted to substitute the last hidden layer in the original VGG

In [17]:
repath   = '../data/dl1/objnc/rep/dogscats/finetune/'
vgg.model.save_weights(repath + 'w.h5')

### Extract representations last hidden layer finetuned

In [20]:
base = vgg.model
layers,models,outshapes = get_models(idx,base)

lambda_4
maxpooling2d_16
maxpooling2d_17
maxpooling2d_18
maxpooling2d_19
maxpooling2d_20
dense_10
dense_11
dense_13
Done.


<keras.engine.training.Model at 0x7fe01d4bb710>

In [24]:
datapath = path + 'train/'
repath   = '../data/dl1/objnc/rep/dogscats/finetune/'
nbatches = 40
batch_size=50
batches = vgg.get_batches(datapath, batch_size=batch_size)
print('N.of batches ' + str(nbatches))

D = []
D_sq = []
layer = layers[-1]
model = models[-1]
print('Processing model : ' + layer.name)

R = []
batches = vgg.get_batches(datapath, batch_size=batch_size, shuffle=False)
ti = time()
for n in range(nbatches):
    imgs,_ = batches.next()       
    R.append(model.predict(imgs))
R = np.asarray(R, dtype=float) 
outshape = list(layer.output_shape)
outshape[0] = nbatches*batch_size
R.shape = tuple(outshape)
print(R.shape)
np.save(repath +  layer.name + '_train', R)

# compute distances
R.shape = R.shape[0], -1
d = scipy.spatial.distance.pdist(R, 'euclidean')
d_sq = scipy.spatial.distance.squareform(d, force='no', checks=True)
D.append(d)
D_sq.append(d_sq)
te = time() - ti
print(te) 
    
np.save(repath + 'D'    + '_train', D)
np.save(repath + 'D_sq' + '_train', D_sq)
print('Done.')

Found 23000 images belonging to 2 classes.
N.of batches 40
Processing model : dense_13
Found 23000 images belonging to 2 classes.
(2000, 2)
70.2854681015
Done.
