## This is simple training code for Tensorflow [Multicolumn networks for face recognition](https://arxiv.org/abs/1807.09192) implementation

In [1]:
import os
import sys
import importlib
import numpy as np
import pandas as pd
import tensorflow as tf

import dataset
import input_pipeline
from evaluation import evaluate

### You can download train data from [competition site](https://competition.machinescansee.com/#/task_description)

### You should create hold-out validation set with split_train_data.py script

In [2]:
#Path to split_train_data.py output directory
base_dir = '/mnt/data/Datasets/mcs2019/sets/'

In [3]:
dev_set = dataset.read_dataset_from_file(os.path.join(base_dir, 'dev.csv'),
                                           os.path.join(base_dir, 'dev_gt.csv'))
dev_descr_arr = np.load(os.path.join(base_dir, 'dev_emb.npy'))
dev_gt_descr_arr = np.load(os.path.join(base_dir, 'dev_gt_emb.npy'))

In [4]:
train_set = dataset.read_dataset_from_file(os.path.join(base_dir, 'train.csv'),
                                           os.path.join(base_dir, 'train_gt.csv'))
train_descr_arr = np.load(os.path.join(base_dir, 'train_emb.npy'))
train_gt_descr_arr = np.load(os.path.join(base_dir, 'train_gt_emb.npy'))

### Model performance can be significantly improved with standard  normalization

In [5]:
mean = np.mean(train_descr_arr, axis=0)
std = np.std(train_descr_arr, axis=0)

In [6]:
np.save('models/mean.npy', mean)
np.save('models/std.npy', std)

### Create model

In [7]:
combinations_per_person = 3
combination_size = 3
combinations_per_batch = 64
num_classes = len(train_set)
embedding_size = train_descr_arr.shape[1]
model_def = 'models.multicolumn'

In [8]:
# Import network module
network_module = importlib.import_module(model_def)

In [None]:
__embeddings, __labels = input_pipeline.__create_pipelnine(train_set, 
                                                           train_descr_arr,
                                                           combinations_per_person, 
                                                           combination_size,
                                                           combinations_per_batch,
                                                           buffer_size=1)

In [None]:
__aggregated_embeddings, __aggregated_embedding, __gamma = network_module.create_model(__embeddings, 
                                                                                       embedding_size, 
                                                                                       combination_size)

### Create loss tensor and train operation

In [11]:
with tf.variable_scope('classification-head'):
    __logits = tf.layers.dense(__aggregated_embeddings, num_classes, 
                               name='fc1', 
                               activation=None, 
                               use_bias=False, 
                               kernel_initializer=tf.initializers.random_normal(stddev=0.01))
    __cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=__labels, logits=__logits)
    __cross_entropy_loss = tf.reduce_mean(__cross_entropy_loss, name='cross_entropy_loss')

In [12]:
__global_step = tf.Variable(0, trainable=False)
__learning_rate = tf.placeholder(tf.float32, name='learning_rate')
optimizer = tf.train.MomentumOptimizer(__learning_rate, 0.9, use_nesterov=True)
__grads_and_vars = optimizer.compute_gradients(__cross_entropy_loss, tf.trainable_variables())
__train_op = optimizer.apply_gradients(__grads_and_vars, global_step=__global_step)

In [13]:
# It's time to start Tensorlow session and run operations on the Graph.
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
config=tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

#__phase_train should be True for correct layer initialization
session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer())

### Evaluate randomly initialized multicolumn network

In [14]:
required_fpr = 10e-6

In [15]:
%%time
tpr, mean = evaluate(session, 
                     __embeddings, 
                     __gamma,                      
                     person_set=dev_set, 
                     embeddings=dev_descr_arr, 
                     gt_embeddings=dev_gt_descr_arr, 
                     required_fpr=required_fpr)
print('score 1 (tpr@fpr={2}): {0:.4f} score 2 (mean distance): {1:.4f}'.format(tpr, mean, required_fpr)) 

100%|██████████| 233/233 [00:07<00:00, 29.73it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.5640 score 2 (mean distance): 0.8204
CPU times: user 11.3 s, sys: 1.27 s, total: 12.6 s
Wall time: 9.85 s


### Train network and evaluate results

In [16]:
step = 0
feed_dict = { __learning_rate: 0.01 }
while step < 10000:
    cross_entropy_loss, _, step = session.run([__cross_entropy_loss, __train_op, __global_step], 
                                               feed_dict=feed_dict)        
    # Log info
    if step % 100 == 0:
        print('Step: [%d]\tCross entropy loss %2.3f' % (step, cross_entropy_loss))
        
    if step % 1000 == 0:        
        tpr, mean = evaluate(session, 
                     __embeddings, 
                     __gamma,                      
                     person_set=dev_set, 
                     embeddings=dev_descr_arr,                     
                     gt_embeddings=dev_gt_descr_arr, 
                     required_fpr=required_fpr)
        print('score 1 (tpr@fpr={2}): {0:.4f} score 2 (mean distance): {1:.4f}'.format(tpr, mean, required_fpr))

Step: [100]	Cross entropy loss 6.549
Step: [200]	Cross entropy loss 6.541
Step: [300]	Cross entropy loss 6.534
Step: [400]	Cross entropy loss 6.529
Step: [500]	Cross entropy loss 6.522
Step: [600]	Cross entropy loss 6.514
Step: [700]	Cross entropy loss 6.507
Step: [800]	Cross entropy loss 6.499
Step: [900]	Cross entropy loss 6.492


  1%|          | 2/233 [00:00<00:12, 18.89it/s]

Step: [1000]	Cross entropy loss 6.489


100%|██████████| 233/233 [00:14<00:00,  9.32it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.5785 score 2 (mean distance): 0.8202
Step: [1100]	Cross entropy loss 6.483
Step: [1200]	Cross entropy loss 6.472
Step: [1300]	Cross entropy loss 6.465
Step: [1400]	Cross entropy loss 6.457
Step: [1500]	Cross entropy loss 6.458
Step: [1600]	Cross entropy loss 6.449
Step: [1700]	Cross entropy loss 6.442
Step: [1800]	Cross entropy loss 6.437
Step: [1900]	Cross entropy loss 6.426


  1%|          | 2/233 [00:00<00:12, 18.05it/s]

Step: [2000]	Cross entropy loss 6.417


100%|██████████| 233/233 [00:13<00:00, 17.42it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.6106 score 2 (mean distance): 0.8198
Step: [2100]	Cross entropy loss 6.406
Step: [2200]	Cross entropy loss 6.409
Step: [2300]	Cross entropy loss 6.398
Step: [2400]	Cross entropy loss 6.396
Step: [2500]	Cross entropy loss 6.385
Step: [2600]	Cross entropy loss 6.372
Step: [2700]	Cross entropy loss 6.366
Step: [2800]	Cross entropy loss 6.359
Step: [2900]	Cross entropy loss 6.346


  1%|          | 2/233 [00:00<00:11, 19.71it/s]

Step: [3000]	Cross entropy loss 6.352


100%|██████████| 233/233 [00:12<00:00, 24.19it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.6514 score 2 (mean distance): 0.8197
Step: [3100]	Cross entropy loss 6.343
Step: [3200]	Cross entropy loss 6.341
Step: [3300]	Cross entropy loss 6.334
Step: [3400]	Cross entropy loss 6.324
Step: [3500]	Cross entropy loss 6.317
Step: [3600]	Cross entropy loss 6.305
Step: [3700]	Cross entropy loss 6.299
Step: [3800]	Cross entropy loss 6.285
Step: [3900]	Cross entropy loss 6.287


  1%|▏         | 3/233 [00:00<00:11, 20.68it/s]

Step: [4000]	Cross entropy loss 6.289


100%|██████████| 233/233 [00:13<00:00, 17.29it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.6820 score 2 (mean distance): 0.8200
Step: [4100]	Cross entropy loss 6.282
Step: [4200]	Cross entropy loss 6.261
Step: [4300]	Cross entropy loss 6.257
Step: [4400]	Cross entropy loss 6.255
Step: [4500]	Cross entropy loss 6.236
Step: [4600]	Cross entropy loss 6.248
Step: [4700]	Cross entropy loss 6.211
Step: [4800]	Cross entropy loss 6.216
Step: [4900]	Cross entropy loss 6.211


  1%|▏         | 3/233 [00:00<00:11, 19.43it/s]

Step: [5000]	Cross entropy loss 6.203


100%|██████████| 233/233 [00:11<00:00, 19.51it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7154 score 2 (mean distance): 0.8206
Step: [5100]	Cross entropy loss 6.210
Step: [5200]	Cross entropy loss 6.196
Step: [5300]	Cross entropy loss 6.206
Step: [5400]	Cross entropy loss 6.164
Step: [5500]	Cross entropy loss 6.166
Step: [5600]	Cross entropy loss 6.164
Step: [5700]	Cross entropy loss 6.151
Step: [5800]	Cross entropy loss 6.149
Step: [5900]	Cross entropy loss 6.142


  1%|          | 2/233 [00:00<00:11, 19.34it/s]

Step: [6000]	Cross entropy loss 6.148


100%|██████████| 233/233 [00:12<00:00, 19.35it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7235 score 2 (mean distance): 0.8213
Step: [6100]	Cross entropy loss 6.137
Step: [6200]	Cross entropy loss 6.116
Step: [6300]	Cross entropy loss 6.142
Step: [6400]	Cross entropy loss 6.099
Step: [6500]	Cross entropy loss 6.100
Step: [6600]	Cross entropy loss 6.105
Step: [6700]	Cross entropy loss 6.105
Step: [6800]	Cross entropy loss 6.072
Step: [6900]	Cross entropy loss 6.083


  1%|          | 2/233 [00:00<00:12, 18.69it/s]

Step: [7000]	Cross entropy loss 6.063


100%|██████████| 233/233 [00:13<00:00, 16.77it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7332 score 2 (mean distance): 0.8221
Step: [7100]	Cross entropy loss 6.070
Step: [7200]	Cross entropy loss 6.056
Step: [7300]	Cross entropy loss 6.046
Step: [7400]	Cross entropy loss 6.046
Step: [7500]	Cross entropy loss 6.067
Step: [7600]	Cross entropy loss 6.019
Step: [7700]	Cross entropy loss 6.040
Step: [7800]	Cross entropy loss 6.007
Step: [7900]	Cross entropy loss 6.000


  1%|▏         | 3/233 [00:00<00:11, 20.63it/s]

Step: [8000]	Cross entropy loss 6.010


100%|██████████| 233/233 [00:13<00:00, 16.73it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7394 score 2 (mean distance): 0.8229
Step: [8100]	Cross entropy loss 5.991
Step: [8200]	Cross entropy loss 5.997
Step: [8300]	Cross entropy loss 5.961
Step: [8400]	Cross entropy loss 5.943
Step: [8500]	Cross entropy loss 5.970
Step: [8600]	Cross entropy loss 5.953
Step: [8700]	Cross entropy loss 5.970
Step: [8800]	Cross entropy loss 5.955
Step: [8900]	Cross entropy loss 5.981


  1%|▏         | 3/233 [00:00<00:11, 20.82it/s]

Step: [9000]	Cross entropy loss 5.949


100%|██████████| 233/233 [00:12<00:00, 18.85it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7427 score 2 (mean distance): 0.8237
Step: [9100]	Cross entropy loss 5.938
Step: [9200]	Cross entropy loss 5.915
Step: [9300]	Cross entropy loss 5.901
Step: [9400]	Cross entropy loss 5.931
Step: [9500]	Cross entropy loss 5.902
Step: [9600]	Cross entropy loss 5.896
Step: [9700]	Cross entropy loss 5.889
Step: [9800]	Cross entropy loss 5.894
Step: [9900]	Cross entropy loss 5.907


  1%|          | 2/233 [00:00<00:11, 19.80it/s]

Step: [10000]	Cross entropy loss 5.865


100%|██████████| 233/233 [00:12<00:00, 18.99it/s]


Negative pair count:  4556480
Positive pair count:  19640
score 1 (tpr@fpr=1e-05): 0.7463 score 2 (mean distance): 0.8246


### Typically, after training tpr@fpr=1e-5 is in range 0.74...0.75. And this gave me 0.8166 on the [private leaderbord](https://competition.machinescansee.com/#/leaderboard/private)