In [1]:
import os
import json
import pickle
import tensorflow.keras as keras
import random
import numpy as np
import xgboost as xgb
import tensorflow as tf
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

random.seed(123)
np.random.seed(123)

2021-09-18 17:39:16.037002: I tensorflow/stream_executor/platform/default/dso_loader.cc:54] Successfully opened dynamic library libcudart.so.11.0


In [None]:
import numpy as np
from tensorflow.keras import layers

input_shape = (32, 32, 3)
image_size = 72  # We'll resize input images to this size
patch_size = 6  # Size of the patches to be extract from the input images
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
transformer_layers = 8
mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier

def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

def vit(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # Create patches.
    patches = Patches(patch_size)(inputs)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
#     logits = layers.Dense(num_classes)(features)
    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=features)
    return model

vit_classifier = vit(input_shape)

# The input embeddings

The data in the input pickle file is stored in a dictionary structure:
```
{
    [
        'FAMILY_ID/PERSON_ID': [EMB_1, EMB_2...EMB_N],
        .
        .
        .
    ]
}
```

In [2]:
with open('../data/train_img_embeddings.pkl', 'rb') as f:
       train_embeddings = pickle.load(f)
print(f'The keys examples: {list(train_embeddings.keys())[:5]}')

embedding_shape = list(list(train_embeddings.values())[0].values())[0].shape
print(f'Embeddings shape: {embedding_shape}')

The keys examples: ['F0475/MID3', 'F0475/MID7', 'F0475/MID6', 'F0475/MID4', 'F0475/MID2']
Embeddings shape: (512,)


# Training pairs generating

Available training pairs from csv files are splitted to train - validation sets. Those pairs are positive(there is blood relation). For each set(train/valid) we additionally generate negative pairs.

Positive pairs are generated according to the input csv file. For each person of positive pair we create one negative pair.
In total we'll have twice more negative than positive pairs.

In [3]:
def make_image_pair(pair):
    '''
    Create pair of embeddings.
    
    Arguments:
    p1, p2 -- paths to persons' images directories (familyID/personID)
    
    Returns:
    pairs -- array of image pairs, pairing is alligned to smaller number of images
    '''
        
    p1, p2 = pair
    
    dir1 = train_embeddings[p1].values()
    dir2 = train_embeddings[p2].values()
    
    for e1 in dir1:
        for e2 in dir2:
            yield np.concatenate([e1, e2], axis=0)

In [4]:
def pairs_set(input_pairs):
    for pair, label in input_pairs:
        try:
            embs = make_image_pair(pair)
            for emb in embs:
                yield emb, label
        except KeyError:
            continue

def batched_pairs(input_pairs, batch_size, dataset_period):
    embs = []
    labels = []
    counter = 0
    for example in pairs_set(input_pairs):
        # Get every nth sample
        counter += 1
        if counter % dataset_period:
            continue
        
        emb, label = example
        embs.append(emb)
        labels.append(np.array(label, dtype=int))
        if len(labels) == batch_size:
            yield np.array(embs), np.array(labels)
            embs, labels = [], []

In [5]:
with open('../train_val_set.json', 'r') as f:
    train_val_set = json.load(f)

train_rlt_list, neg_train_rltshps, valid_rlt_list, neg_valid_rltshps = list(train_val_set.values())
train_rlts = list(zip(train_rlt_list + neg_train_rltshps, [True]*len(train_rlt_list) + [False]*len(neg_train_rltshps)))
val_rlts = list(zip(valid_rlt_list + neg_valid_rltshps, [True]*len(valid_rlt_list) + [False]*len(neg_valid_rltshps)))

random.shuffle(train_rlts)
random.shuffle(val_rlts)

## Training Callbacks

In [6]:
def val_distance_stats(predictions, labels):
    val_pos = predictions[labels.astype(np.bool)]
    val_neg = predictions[(1 - labels).astype(np.bool)]
    val_pos_m, val_pos_s = np.mean(val_pos), np.std(val_pos)
    val_neg_m, val_neg_s = np.mean(val_neg), np.std(val_neg)
    
    return val_pos_m, val_pos_s, val_neg_m, val_neg_s
  
class MetricCallback(xgb.callback.TrainingCallback):
    def __init__(self, logdir):
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        self.train_writer = tf.summary.create_file_writer(logdir + '/train')
        self.valid_writer = tf.summary.create_file_writer(logdir + '/valid')
        self.class_encoded = {
            0: 'not_related',
            1: 'related'
        }
        
    def tb_writer(self, items_to_write, wtype, epoch):
        writer = self.train_writer if wtype == 'train' else self.valid_writer
        
        with writer.as_default():
            for name, value in items_to_write.items():
                tf.summary.scalar(name, value, epoch)
            writer.flush()
        
    def after_iteration(self, model, epoch, evals_log):
        val_true = []
        val_pred = []
        for batch in batched_pairs(val_rlts, batch_size, eval_dataset_period):
            val_pred.append(model.predict(xgb.DMatrix(batch[0])))
            val_true.extend(list(batch[1]))
        
        val_true = np.array(val_true).astype(int)
        val_pred = np.concatenate(val_pred, axis=0)
        val_pred = np.around(val_pred)
        
        # Precision and recall
        valid_precision, valid_recall, _, _ = precision_recall_fscore_support(val_true, val_pred, labels=[0, 1])
        valid_accuracy = accuracy_score(val_true, val_pred)
        
        logs = {}
        for k, v in self.class_encoded.items():
            logs['valid/precision/' + v] = valid_precision[k]
            logs['valid/recall/' + v] = valid_recall[k]
        
        logs['valid/accuracy'] = valid_accuracy

        self.tb_writer(logs, wtype='valid', epoch=epoch)

## Run tensorboard plugin in order to track changes of training

In [None]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [None]:
%tensorboard --logdir=./logs/xgboost --port=7008

# Run training

In [7]:
epochs = 2000
batch_size = 32
dataset_period = 2
eval_dataset_period = 12
lr = 1e-3

In [None]:
model_name = 'mobile_xgboost_002'
logdir = os.path.join('logs/xgboost', model_name)
metric_callback = MetricCallback(logdir)

In [8]:
train_x = []
train_y = []
for batch in batched_pairs(train_rlts, batch_size, dataset_period):
    train_x.append(batch[0])
    train_y.append(batch[1])
train_x = np.concatenate(train_x, axis=0)
train_y = np.concatenate(train_y)

print(f'Original dataset length: {train_y.shape}')
print(f'Number of negatives: {(1 - train_y).sum()}')

Original dataset length: (166432,)
Number of negatives: 133346


In [None]:
xgb_cls = xgb.XGBClassifier(n_estimators=epochs,
                            max_depth=70,
                            learning_rate=lr,
                            use_label_encoder=False,
                            scale_pos_weight=50)

xgb_cls.fit(train_x, train_y, verbose=True, callbacks=[metric_callback])

# Submission

In [None]:
# Load submission pairs
submission_path = 'data/sample_submission.csv'
submission_df = pd.read_csv(submission_path)

In [None]:
# Load models
ckpt_path = 'checkpoints/model_6/weights.70-0.11.hdf5'
model.load_weights(ckpt_path)
embedder = FaceNet()

In [None]:
# Get the threshold according to validation ds
val_pred = model.predict([val_pairs[:, 0], val_pairs[:, 1]])
val_pos_m, val_pos_s, val_neg_m, val_neg_s = val_distance_stats(val_pred, val_labels.astype(np.int))
threshold = ((val_pos_m + val_pos_s) + (val_neg_m - val_neg_s)) / 2

# Iterate over submission pairs
is_related = submission_df['is_related']
predictions = []
for idx, row in submission_df.iterrows():
    # Load images
    img_pair = row['img_pair']
    img1_name, img2_name = img_pair.split('-')
    img1_path = os.path.join('data/test', img1_name)
    img2_path = os.path.join('data/test', img2_name)
    img1 = image.load_img(img1_path)
    img2 = image.load_img(img2_path)
    img1 = np.array(img1).astype('float32')
    img2 = np.array(img2).astype('float32')
    
    # Get FaceNet embeddings
    embedding1 = embedder.embeddings([img1])
    embedding2 = embedder.embeddings([img2])
    
    # Do an inference, if distance is smaller than threshold
    # then there is the relation
    y_pred = model.predict([embedding1, embedding2])
    predictions.append(y_pred[0])
    if y_pred.squeeze() < threshold:
        is_related[idx] = 1
    
    # Print step
    if idx % 100 == 0:
        print(f'Processed rows: {idx}')
        
submission_df.to_csv(f'submission_{model_name}.csv', index=False)

In [None]:
plt.hist(predictions, 20)
plt.show()

In [None]:
thr = 0.85
for i, p in enumerate(predictions):
    if p < thr:
        is_related[i] = 1
    else:
        is_related[i] = 0
submission_df.to_csv(f'submission_test.csv', index=False)