In [None]:
import os
import json
import pickle
import keras
import random
import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset

from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from keras.optimizers import RMSprop, SGD, Adam
from keras.applications import MobileNet, ResNet50
from keras.regularizers import l2
from keras.preprocessing import image
from keras import backend as K
from keras.models import Model
from keras.callbacks import Callback, LearningRateScheduler
from keras.layers import Input, Flatten, Dense, Dropout, Lambda, Conv1D, Attention, GlobalAveragePooling1D, BatchNormalization
from keras_facenet import FaceNet

random.seed(123)
tf.random.set_seed(12)
np.random.seed(123)

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# The input embeddings

The data in the input pickle file is stored in a dictionary structure:
```
{
    [
        'FAMILY_ID/PERSON_ID': [EMB_1, EMB_2...EMB_N],
        .
        .
        .
    ]
}
```

In [None]:
with open('data/train_img_embeddings.pkl', 'rb') as f:
       train_embeddings = pickle.load(f)
print(f'The keys examples: {list(train_embeddings.keys())[:5]}')

embedding_shape = list(train_embeddings.values())[0].shape
print(f'Embeddings shape: {embedding_shape}')

# Training pairs generating

Available training pairs from csv files are splitted to train - validation sets. Those pairs are positive(there is blood relation). For each set(train/valid) we additionally generate negative pairs.

Positive pairs are generated according to the input csv file. For each person of positive pair we create one negative pair.
In total we'll have twice more negative than positive pairs.

In [None]:
def make_image_pair(pair):
    '''
    Create pair of embeddings.
    
    Arguments:
    p1, p2 -- paths to persons' images directories (familyID/personID)
    
    Returns:
    pairs -- array of image pairs, pairing is alligned to smaller number of images
    '''
    def emb_to_3channel(emb):
#         emb_1ch = np.concatenate([emb, emb], axis=0)
#         emb_2ch = np.concatenate([emb, emb[::-1,:]], axis=0)
#         emb_3ch = np.concatenate([emb[::-1,:], emb], axis=0)
#         emb_mc = np.concatenate([emb_1ch, emb_2ch, emb_3ch], axis=-1)
        emb_pad = np.concatenate([emb, np.zeros(emb.shape)], axis=0)
        emb_mc = np.tile(emb_pad, (1, 3))
        emb_mc = np.reshape(emb_mc, (32, 32, 3))
        return emb_mc
        
    p1, p2 = pair
    img_path1 = p1.replace('/', '\\')
    img_path2 = p2.replace('/', '\\')
    
    dir1 = np.expand_dims(train_embeddings[img_path1], axis=-1)
    dir2 = np.expand_dims(train_embeddings[img_path2], axis=-1)
    
    for i in range(len(dir1)):
        for j in range(len(dir2)):
#             yield emb_to_3channel(dir1[i]), emb_to_3channel(dir2[j])
            yield dir1[i], dir2[j]

In [None]:
def pairs_set(input_pairs):
    for pair, label in input_pairs:
        try:
            emb_pairs = make_image_pair(pair)
            for emb_pair in emb_pairs:
                yield emb_pair, label
        except KeyError:
            continue

def batched_pairs(input_pairs, batch_size, dataset_period):
    embs1 = []
    embs2 = []
    labels = []
    counter = 0
    for example in pairs_set(input_pairs):
        # Get every nth sample
        counter += 1
        if counter % dataset_period:
            continue
        
        embs, label = example
        emb1, emb2 = embs
        embs1.append(emb1)
        embs2.append(emb2)
        labels.append(label)
        if len(labels) == batch_size:
            yield {'input_1:0':np.array(embs1), 'input_2:0':np.array(embs2)}, np.array(labels).astype(float)
            embs1, embs2, labels = [], [], []

In [None]:
with open('train_val_set.json', 'r') as f:
    train_val_set = json.load(f)

train_rlt_list, neg_train_rltshps, valid_rlt_list, neg_valid_rltshps = list(train_val_set.values())
train_rlts = list(zip(train_rlt_list + neg_train_rltshps, [True]*len(train_rlt_list) + [False]*len(neg_train_rltshps)))
val_rlts = list(zip(valid_rlt_list + neg_valid_rltshps, [True]*len(valid_rlt_list) + [False]*len(neg_valid_rltshps)))

random.shuffle(train_rlts)
random.shuffle(val_rlts)



# Siamese network

Initial experimenting is done with conv1D deep neural network, as additional option for experimenting there is simple attention module.

In [None]:
def conv1D_model(input_shape, l2_value, dropout):
    '''
    Create deep Keras model.
    
    Arguments:
    input_shape -- shape of the input layer
    
    Returns:
    Model -- Keras model
    '''
    def residual(x, kernel, l2_value, activation='relu'):
        x1 = Conv1D(x.shape[-1], kernel, kernel_regularizer=l2(l2_value), activation=activation, padding='same')(x)
        x1 = BatchNormalization()(x1)
        return x + x1
    
    input = Input(shape=input_shape)
    x = Conv1D(1, 33, kernel_regularizer=l2(l2_value), activation='relu')(input)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    '''
    x = Conv1D(input.shape[1] // 128, 3, kernel_regularizer=l2(l2_value), activation='relu')(input)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 3, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 64, 11, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 5, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 64, 5, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 5, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 64, 7, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 7, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 64, 7, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 7, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 32, 11, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 11, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 32, 17, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 17, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 32, 17, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 17, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 32, 17, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 17, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 16, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 16, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 16, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 16, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 8, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 8, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    
    x = Conv1D(input.shape[1] // 8, 19, kernel_regularizer=l2(l2_value), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = residual(x, 19, l2_value, 'relu')
    '''
    x = Flatten()(x)
    
    x = Dense(input.shape[1] // 16, kernel_regularizer=l2(l2_value), activation='relu', name='inference')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
#     x = Dense(input.shape[1] // 16, kernel_regularizer=l2(l2_value), activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = Dropout(dropout)(x)
    return Model(input, x)

def mobilenet(input_shape, l2_value, dropout):
    mobile = MobileNet(
        input_shape=input_shape,
        dropout=dropout,
        include_top=False,
        pooling='avg',
        alpha=0.5,
        weights=None
    )
    
    for layer in mobile.layers:
        layer.trainable = True
        if hasattr(layer, 'kernel_regularizer'):
            setattr(layer, 'kernel_regularizer', keras.regularizers.l2(l2_value))
        
    x = Dense(64, kernel_regularizer=l2(l2_value), activation='tanh')(mobile.output)
    x = Dropout(dropout)(x)
    x = Dense(32, kernel_regularizer=l2(l2_value), activation='tanh')(x)
    x = Dropout(dropout)(x)
    return Model(mobile.input, x)

def resnet50(input_shape, l2_value):
    mobile = ResNet50(
        input_shape=input_shape,
        include_top=False,
        pooling='avg',
        weights=None
    )
    
    for layer in mobile.layers:
        layer.trainable = True
        if hasattr(layer, 'kernel_regularizer'):
            setattr(layer, 'kernel_regularizer', keras.regularizers.l2(l2_value))
        
    x = Dense(64, kernel_regularizer=l2(l2_value), activation='tanh')(mobile.output)
    x = Dropout(dropout)(x)
    x = Dense(32, kernel_regularizer=l2(l2_value), activation='tanh')(x)
    x = Dropout(dropout)(x)
    return Model(mobile.input, x)

def attention_model(input_shape, train_mode=True):
    '''
    Inspired by code example:
    https://www.tensorflow.org/api_docs/python/tf/keras/layers/Attention
    '''
    input = Input(shape=input_shape, dtype='int32')
    query_input = value_input = K.squeeze(input, -1)
    
    # Embedding lookup.
    token_embedding = tf.keras.layers.Embedding(input_dim=input_shape[1], output_dim=64)
    # Query embeddings of shape [batch_size, Tq, dimension].
    query_embeddings = token_embedding(query_input)
    # Value embeddings of shape [batch_size, Tv, dimension].
    value_embeddings = token_embedding(value_input)

    query_seq_encoding = Conv1D(input.shape[1] // 4, 5, activation='relu', padding='same')(
        query_embeddings)
    value_seq_encoding = Conv1D(input.shape[1] // 4, 5, activation='relu', padding='same')(
        value_embeddings)
    
    query_value_attention_seq = tf.keras.layers.Attention()(
        [query_seq_encoding, value_seq_encoding], training=train_mode)
    
    # Reduce over the sequence axis to produce encodings of shape
    # [batch_size, filters].
    query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
        query_seq_encoding)
    query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
        query_value_attention_seq)
    
    # Concatenate query and document encodings to produce a DNN input layer.
    attn_out_layer = tf.keras.layers.Concatenate()([query_encoding, query_value_attention])
    return Model(input, attn_out_layer)

# Loss and metrics functions

In [None]:
MARGIN = 3.0

def euclidean_distance(vectors):
    x, y = vectors
    sum_square = K.sum(K.square(x - y), axis=1)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def cosine_similarity(vectors):
    x, y = vectors
    x_norm = tf.norm(x, axis=1)
    y_norm = tf.norm(y, axis=1)
    x_y_dot = tf.einsum('ij,ij->i', x, y)
    cos_sim = x_y_dot / (x_norm * y_norm + K.epsilon())
    return 1. - cos_sim

def cos_euc_dist(vectors):
    euc = euclidean_distance(vectors)
    cos_sim = cosine_similarity(vectors)
    return (1. - cos_sim) * euc

def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

def contrastive_loss(y_true, y_pred):
    '''
    Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    alpha = 1.
    gamma = 1.5
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(MARGIN - y_pred, 0))
    weight = 30. * y_true + 1.*(1 - y_true)#alpha * K.pow(1 - y_pred, gamma)
    return K.mean(weight * (y_true * square_pred + (1 - y_true) * margin_square))

# Run training

In [None]:
learning_rate = 1e-3
l2_value = 1e-4
dropout = 0.3
epochs = 2000
batch_size = 64
dataset_period = 1
model_name = 'conv1D_model'
# 'euclidian' or 'cosine'
distance_type = 'euclidean_distance'
optimizer = 'Adam'

# Learning rate scheduler
def scheduler(epoch, lr):
    if epoch < 20:
        return lr
    elif epoch < 100:
        return lr / 1.5
    elif epoch < 2000:
        return lr / 2
    else:
        return lr / 10
    
lr_callback = LearningRateScheduler(scheduler)

# Create dictionary of parameters for saving configuration
train_config = {}
for name in [
    'learning_rate',
    'l2_value',
    'dropout',
    'epochs',
    'batch_size',
    'model_name',
    'dataset_period',
    'distance_type',
    'optimizer'
]:
    train_config[name] = eval(name)

In [None]:
# For the conv_1D_model input shape is (512, 1),
# for the mobilenet it is (32, 32, 3)
input_shape = (512, 1)
base_network = eval(model_name)(input_shape, l2_value, dropout)
base_network.count_params()

In [None]:
# Creation of Siamese network
input1 = Input(shape=input_shape)
input2 = Input(shape=input_shape)
processed1 = base_network(input1)
processed2 = base_network(input2)

In [None]:
dist_function = eval(distance_type)
distance = Lambda(dist_function,
                  output_shape=eucl_dist_output_shape)([processed1, processed2])

model = Model([input1, input2], distance)
optimizer = eval(optimizer)(learning_rate=learning_rate)
model.compile(loss=contrastive_loss, optimizer=optimizer)

## Run tensorboard plugin in order to track changes of training

In [None]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [None]:
%tensorboard --logdir=./logs --port=7007

## Training Callbacks

In [None]:
# Get validation labels for the purpose of the metrics callback
val_labels = []
train_labels = []
for e in batched_pairs(val_rlts, 1, dataset_period):
    val_labels.append(e[1][0])

for e in batched_pairs(train_rlts, 1, dataset_period):
    train_labels.append(e[1][0])

val_len = len(val_labels)
train_len = len(train_labels)

In [None]:
def val_distance_stats(predictions, labels):
    val_pos = predictions[labels.astype(np.bool)]
    val_neg = predictions[(1 - labels).astype(np.bool)]
    val_pos_m, val_pos_s = np.mean(val_pos), np.std(val_pos)
    val_neg_m, val_neg_s = np.mean(val_neg), np.std(val_neg)
    
    return val_pos_m, val_pos_s, val_neg_m, val_neg_s
    
    
class MetricCallback(keras.callbacks.Callback):
    def __init__(self, logdir):
        super(Callback, self).__init__()
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        self.train_writer = tf.summary.create_file_writer(logdir + '/train')
        self.valid_writer = tf.summary.create_file_writer(logdir + '/valid')
        self.step_number = 0
        
    def tb_writer(self, items_to_write, wtype):
        writer = self.train_writer if wtype == 'train' else self.valid_writer
        
        with writer.as_default():
            for name, value in items_to_write.items():
                tf.summary.scalar(name, value, self.step_number)
            writer.flush()
        
    def on_epoch_end(self, epoch, logs={}):
        class_encoded = {
            0: 'not_related',
            1: 'related'
        }
        val_true = np.array(val_labels).astype(int)[:-(val_len%batch_size)]
        val_pred = []
        for batch in batched_pairs(val_rlts, batch_size, dataset_period):
            val_pred.append(self.model.predict(batch))
        
        val_pred = np.concatenate(val_pred, axis=0)
        val_pos_m, val_pos_s, val_neg_m, val_neg_s = val_distance_stats(val_pred, val_true)
        threshold = (val_pos_m + val_neg_m ) / 2
        
        # Precision and recall
        val_pred = (val_pred.squeeze() < threshold).astype(int)
        valid_precision, valid_recall, _, _ = precision_recall_fscore_support(val_true, val_pred, labels=[0, 1])
        valid_accuracy = accuracy_score(val_true, val_pred)
        
        train_loss = logs['loss']
        valid_loss = logs['val_loss']
        logs = {}
        logs['train/loss'] = train_loss
        
        self.tb_writer(logs, wtype='train')
        
        logs = {}
        logs['valid/loss'] = valid_loss
        
        for k, v in class_encoded.items():
            logs['valid/precision/' + v] = valid_precision[k]
            logs['valid/recall/' + v] = valid_recall[k]
            logs['valid/dist_mean/' + v] = val_pos_m if k else val_neg_m
            logs['valid/dist_std/' + v] = val_pos_s if k else val_neg_s
        
        logs['valid/accuracy'] = valid_accuracy

        self.tb_writer(logs, wtype='valid')
        self.step_number += 1

In [None]:
model_name = 'mobile_007_euc'

#Save training configuration
with open(f'configs/{model_name}.json', 'w') as f:
    json.dump(train_config, f)

logdir = os.path.join('logs', model_name)
ckpt_dir = os.path.join('checkpoints', model_name)
os.makedirs(ckpt_dir)
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)
ckpt_callback = keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(ckpt_dir, 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'),
    save_weights_only=True,
    period=5
)
metric_callback = MetricCallback(logdir)

In [None]:
def repeat_generator(rlts, batch_size, dataset_period):
    while True:
        for e in batched_pairs(rlts, batch_size, dataset_period):
            yield e
            
model.fit(
    repeat_generator(train_rlts, batch_size, dataset_period),
    epochs=epochs,
    steps_per_epoch=train_len//batch_size,
    validation_data=repeat_generator(val_rlts, batch_size, dataset_period),
    validation_steps=val_len//batch_size,
    callbacks=[metric_callback, ckpt_callback]#, lr_callback]
)

# Submission

In [None]:
# Load submission pairs
submission_path = 'data/sample_submission.csv'
submission_df = pd.read_csv(submission_path)

In [None]:
# Load models
ckpt_path = 'checkpoints/model_6/weights.70-0.11.hdf5'
model.load_weights(ckpt_path)
embedder = FaceNet()

In [None]:
# Get the threshold according to validation ds
val_pred = model.predict([val_pairs[:, 0], val_pairs[:, 1]])
val_pos_m, val_pos_s, val_neg_m, val_neg_s = val_distance_stats(val_pred, val_labels.astype(np.int))
threshold = ((val_pos_m + val_pos_s) + (val_neg_m - val_neg_s)) / 2

# Iterate over submission pairs
is_related = submission_df['is_related']
predictions = []
for idx, row in submission_df.iterrows():
    # Load images
    img_pair = row['img_pair']
    img1_name, img2_name = img_pair.split('-')
    img1_path = os.path.join('data/test', img1_name)
    img2_path = os.path.join('data/test', img2_name)
    img1 = image.load_img(img1_path)
    img2 = image.load_img(img2_path)
    img1 = np.array(img1).astype('float32')
    img2 = np.array(img2).astype('float32')
    
    # Get FaceNet embeddings
    embedding1 = embedder.embeddings([img1])
    embedding2 = embedder.embeddings([img2])
    
    # Do an inference, if distance is smaller than threshold
    # then there is the relation
    y_pred = model.predict([embedding1, embedding2])
    predictions.append(y_pred[0])
    if y_pred.squeeze() < threshold:
        is_related[idx] = 1
    
    # Print step
    if idx % 100 == 0:
        print(f'Processed rows: {idx}')
        
submission_df.to_csv(f'submission_{model_name}.csv', index=False)

In [None]:
plt.hist(predictions, 20)
plt.show()

In [None]:
thr = 0.85
for i, p in enumerate(predictions):
    if p < thr:
        is_related[i] = 1
    else:
        is_related[i] = 0
submission_df.to_csv(f'submission_test.csv', index=False)