### First we train a cnn to extract the features

In [1]:
import os.path
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import OneHotEncoder
from model import build_my_cnn

WEIGHTS_DIR = 'data/weights/'

NB_EPOCH = 30
BATCH_SIZE = 16

AUGMENTATION = True

train_x, train_y = np.load('data/train_x.npy'), np.load('data/train_y.npy')
test_x, test_y = np.load('data/test_x.npy'), np.load('data/test_y.npy')

n_subjects = len(set(train_y))
n_train = train_x.shape[0]
n_test = test_x.shape[0]

One = OneHotEncoder()
One.fit(train_y.reshape(-1, 1))

train_y = One.transform(train_y.reshape(-1, 1)).todense()
test_y = One.transform(test_y.reshape(-1, 1)).todense()

print('n_train: {}'.format(n_train))
print('n_test: {}'.format(n_test))
print('n_subjects: {}'.format(n_subjects))

checkpoint = ModelCheckpoint(WEIGHTS_DIR + 'weights.best.h5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.1,
    horizontal_flip=True)

model = build_my_cnn(227, n_subjects)
model.summary()

model.fit_generator(datagen.flow(train_x, train_y, batch_size=BATCH_SIZE),
                        samples_per_epoch=train_x.shape[0],
                        nb_epoch=NB_EPOCH,
                        validation_data=[test_x, test_y],
                        callbacks=[checkpoint])

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


n_train: 444
n_test: 46
n_subjects: 24
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 227, 227, 3)       12        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 227, 227, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 227, 227, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 113, 113, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 111, 111, 64)      18496     
_________________________________________________________________
activation_2 (Activation)    (None, 111, 111, 64)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x24161febe10>

### Then we train a more accuracy model with triplet loss

In [3]:
import itertools
import numpy as np
from model import build_my_cnn
from model import build_tpe
from model import Bottleneck
from identification import get_scores, calc_metrics
from sklearn.decomposition import PCA

n_in = 24
n_out = 24

data_dir='data/'

cnn = build_my_cnn(227, 24)
cnn.load_weights(data_dir+'weights/weights.best.h5')
bottleneck = Bottleneck(cnn, ~1)

train_x, train_y = np.load(data_dir+'train_x.npy'), np.load(data_dir+'train_y.npy')
test_x, test_y = np.load(data_dir+'test_x.npy'), np.load(data_dir+'test_y.npy')

train_x = np.vstack([train_x, test_x])
train_y = np.hstack([train_y, test_y])

dev_x = np.load(data_dir+'dev_x.npy')
dev_protocol = np.load(data_dir+'dev_protocol.npy')

train_emb = bottleneck.predict(train_x, batch_size=256)
dev_emb = bottleneck.predict(dev_x, batch_size=256)

del train_x

pca = PCA(n_out)
pca.fit(train_emb)
W_pca = pca.components_

tpe, tpe_pred = build_tpe(n_in, n_out, W_pca.T)
# tpe.load_weights('data/weights/weights.tpe.mineer.h5')

train_y = np.array(train_y)
subjects = list(set(train_y))

anchors_inds = []
positives_inds = []
labels = []

for subj in subjects:
    mask = train_y == subj
    inds = np.where(mask)[0]
    for a, p in itertools.permutations(inds, 2):
        anchors_inds.append(a)
        positives_inds.append(p)
        labels.append(subj)

anchors = train_emb[anchors_inds]
positives = train_emb[positives_inds]
n_anchors = len(anchors_inds)

NB_EPOCH = 100
COLD_START = NB_EPOCH
BATCH_SIZE = 4
BIG_BATCH_SIZE = 512

inds = np.arange(n_anchors)

def get_batch(hard=False):
    batch_inds = np.random.choice(inds, size=BIG_BATCH_SIZE, replace=False)

    train_emb2 = tpe_pred.predict(train_emb, batch_size=1024)
    scores = train_emb2 @ train_emb2.T
    negative_inds = []

    for i in batch_inds:
        label = labels[i]
        mask = train_y == label
        if hard:
            negative_inds.append(np.ma.array(scores[label], mask=mask).argmax())
        else:
            negative_inds.append(np.random.choice(np.where(np.logical_not(mask))[0], size=1)[0])

    return anchors[batch_inds], positives[batch_inds], train_emb[negative_inds]


def test():
    dev_emb2 = tpe_pred.predict(dev_emb)
    tsc, isc = get_scores(dev_emb2, dev_protocol)
    eer, _, _, _ = calc_metrics(tsc, isc)
    return eer

z = np.zeros((BIG_BATCH_SIZE,))

mineer = float('inf')

for e in range(NB_EPOCH):
    print('epoch: {}'.format(e))
    a, p, n = get_batch(e > COLD_START)
    tpe.fit([a, p, n], z, batch_size=BATCH_SIZE, epochs=1)
    eer = test()
    print('EER: {:.2f}'.format(eer * 100))
    if eer < mineer:
        mineer = eer
        tpe.save_weights(data_dir+'weights/weights.tpe.h5')


  base_model.add(Dense(n_out, input_dim=n_in, bias=False, weights=[W_pca], activation='linear'))
  e = merge([a_emb, p_emb, n_emb], mode=triplet_merge, output_shape=triplet_merge_shape)
  name=name)
  model = Model(input=[a, p, n], output=e)
  predict = Model(input=a, output=a_emb)


epoch: 0
Epoch 1/1
EER: 13.63
epoch: 1
Epoch 1/1
EER: 16.87
epoch: 2
Epoch 1/1
EER: 14.60
epoch: 3
Epoch 1/1
EER: 15.36
epoch: 4
Epoch 1/1
EER: 17.20
epoch: 5
Epoch 1/1
EER: 17.31
epoch: 6
Epoch 1/1
EER: 17.63
epoch: 7
Epoch 1/1
EER: 18.71
epoch: 8
Epoch 1/1
EER: 17.96
epoch: 9
Epoch 1/1
EER: 18.06
epoch: 10
Epoch 1/1
EER: 16.98
epoch: 11
Epoch 1/1
EER: 17.31
epoch: 12
Epoch 1/1
EER: 20.01
epoch: 13
Epoch 1/1
EER: 20.44
epoch: 14
Epoch 1/1
EER: 19.04
epoch: 15
Epoch 1/1
EER: 20.12
epoch: 16
Epoch 1/1
EER: 24.01
epoch: 17
Epoch 1/1
EER: 19.58
epoch: 18
Epoch 1/1
EER: 23.58
epoch: 19
Epoch 1/1
EER: 20.77
epoch: 20
Epoch 1/1
EER: 22.17
epoch: 21
Epoch 1/1
EER: 21.96
epoch: 22
Epoch 1/1
EER: 19.90
epoch: 23
Epoch 1/1
EER: 23.15
epoch: 24
Epoch 1/1
EER: 20.44
epoch: 25
Epoch 1/1
EER: 23.04
epoch: 26
Epoch 1/1
EER: 21.85
epoch: 27
Epoch 1/1
EER: 22.61
epoch: 28
Epoch 1/1
EER: 24.45
epoch: 29
Epoch 1/1
EER: 23.04
epoch: 30
Epoch 1/1
EER: 24.55
epoch: 31
Epoch 1/1
EER: 24.66
epoch: 32
Epoch 1/

EER: 25.64
epoch: 80
Epoch 1/1
EER: 31.15
epoch: 81
Epoch 1/1
EER: 23.04
epoch: 82
Epoch 1/1
EER: 32.23
epoch: 83
Epoch 1/1
EER: 28.45
epoch: 84
Epoch 1/1
EER: 29.10
epoch: 85
Epoch 1/1
EER: 28.77
epoch: 86
Epoch 1/1
EER: 30.61
epoch: 87
Epoch 1/1
EER: 28.01
epoch: 88
Epoch 1/1
EER: 29.75
epoch: 89
Epoch 1/1
EER: 30.39
epoch: 90
Epoch 1/1
EER: 30.07
epoch: 91
Epoch 1/1
EER: 28.88
epoch: 92
Epoch 1/1
EER: 31.48
epoch: 93
Epoch 1/1
EER: 28.88
epoch: 94
Epoch 1/1
EER: 30.40
epoch: 95
Epoch 1/1
EER: 29.31
epoch: 96
Epoch 1/1
EER: 27.69
epoch: 97
Epoch 1/1
EER: 31.58
epoch: 98
Epoch 1/1
EER: 28.77
epoch: 99
Epoch 1/1
EER: 31.26


### Let's test some images

In [7]:
from model import FaceVerificator
from skimage import io

###
img_path_0 = 'data/dev/21.jpg'
img_path_1 = 'data/dev/22.jpg'
dist = 0.85
###

extractor = FaceVerificator('model')
extractor.initialize_model()

img_0 = io.imread(img_path_0)
img_1 = io.imread(img_path_1)

faces_0 = extractor.process_image(img_0)
faces_1 = extractor.process_image(img_1)

n_faces_0 = len(faces_0)
n_faces_1 = len(faces_1)

if n_faces_0 == 0 or n_faces_1 == 0:
    print('Error: No faces found on the {}!'.format(img_path_0 if n_faces_0 == 0 else img_path_1))
    exit()

rects_0 = list(map(lambda p: p[0], faces_0))
rects_1 = list(map(lambda p: p[0], faces_1))

embs_0 = list(map(lambda p: p[1], faces_0))
embs_1 = list(map(lambda p: p[1], faces_1))

scores, comps = extractor.compare_many(dist, embs_0, embs_1)

print('Rects on image 0: {}'.format(rects_0))
print('Rects on image 1: {}'.format(rects_1))

print('Embeddings of faces on image 0:')
print(embs_0)

print('Embeddings of faces on image 1:')
print(embs_1)

print('Score matrix:')
print(scores)

print('Decision matrix :')
print(comps)

  base_model.add(Dense(n_out, input_dim=n_in, bias=False, weights=[W_pca], activation='linear'))
  e = merge([a_emb, p_emb, n_emb], mode=triplet_merge, output_shape=triplet_merge_shape)
  name=name)
  model = Model(input=[a, p, n], output=e)
  predict = Model(input=a, output=a_emb)


Rects on image 0: [rectangle(171,234,449,511)]
Rects on image 1: [rectangle(425,272,656,503)]
Embeddings of faces on image 0:
[array([ 0.13418996, -0.6970258 ,  0.3399457 , -0.10224586,  0.19762553,
       -0.4532321 , -0.11152461,  0.10998376,  0.14906865, -0.13990404,
       -0.06401891,  0.02188361, -0.13371314,  0.04735205, -0.04986608,
       -0.01053285,  0.00349552, -0.04953917, -0.08118474, -0.09452056,
        0.07530235,  0.00160445,  0.08239461, -0.03971564], dtype=float32)]
Embeddings of faces on image 1:
[array([ 0.24917145, -0.423666  ,  0.45562866, -0.22780605,  0.31777102,
       -0.52102154,  0.06990661,  0.06385479,  0.07251784, -0.24117978,
       -0.03866799, -0.00900814, -0.11571075,  0.02112562, -0.02454004,
       -0.00770312,  0.02071871, -0.06449758, -0.0772385 , -0.08955176,
        0.0880991 , -0.03264282,  0.07646518, -0.06908865], dtype=float32)]
Score matrix:
[[0.90333]]
Decision matrix :
[[ True]]
