# **Large-Scale Kinship Recognition Data Challenge: Kinship Verification STARTER NOTEBOOK**

We provide framework code to get you started on the competition. The notebook is broken up into three main sections. 
1. Data Loading & Visualizing
2. Data Generator & Model Building
3. Training & Testing Model

We have done the majority of the heavy lifting by making the data easily and readily accessible through Google Drive. Furthermore, we have made the task easier by creating a dataloader and fully trained end-to-end model that predicts a binary label (0 or 1) denoting whether two faces share a kinship relation. 

**WARNING: IF YOU HAVE NOT DONE SO**

Change to GPU:

Runtime --> Change Runtime Type --> GPU

Mount to Google Drive

Install Libraries

In [1]:
%%capture
!pip install mxnet
!pip install mxnet-cu101
!pip install insightface
!pip install gluonfr
!pip install onnxruntime
!pip install opencv-python

Mount your google drive

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount= True)

Mounted at /content/drive


In [3]:
import mxnet as mx
import numpy as np
from mxnet import gluon
from mxnet import autograd as ag
from mxnet.gluon.data.vision import transforms
from insightface.utils.face_align import norm_crop
import cv2
from mxnet.gluon.data import Dataset

import time
import gluonfr
from pathlib import Path
import os
from tqdm import tqdm

import random
from pathlib import Path
from typing import Tuple, Optional, Generator, Any, List, Union, Callable

**All Paths used for training and testing**

In [4]:
root = '/content/drive/My Drive/DeepLearning/KaggleCompetition'
train_path = Path(os.path.join(root, 'train-faces-processed'))
pre_trained_models_path = Path(os.path.join(root, 'pre_trained_models'))

In [5]:
Img = np.ndarray # HxWxC image in numpy (read with cv2.imread
MxImg = mx.nd.NDArray  # HxWxC image in mxnet (read with mx.img.imread or converted from Img)
Embedding = mx.nd.NDArray  # 1x512 image embedding (CNN output given input image)
MxImgArray = mx.nd.NDArray  # NxCxHxW batch of input images
Labels = mx.nd.NDArray  # Nx1 float unscaled kinship relation labels
ImgOrPath = Union[Img, Path]
ImgPairOrPath = Tuple[ImgOrPath, ImgOrPath]
PairPath = Tuple[Path, Path]

**Generate your own train_ds.csv**

train_ds.csv has three columns.


1.   Path of the image
2.   Person Id
3.   Family Id





In [6]:
families = [[cur_person for cur_person in cur_family.iterdir()
                    if cur_person.is_dir()]
                    for cur_family in train_path.iterdir()]

In [7]:
seq = [(img_path,  family_idx, person_idx)
                  for family_idx, cur_family in enumerate(families)
                  for person_idx, cur_person in enumerate(cur_family)
                  for img_path in cur_person.iterdir()]

In [8]:
print(len(seq))

5037


In [9]:
class FamiliesDataset(Dataset):
  def __init__(self):
    super(FamiliesDataset, self).__init__()

  def __getitem__(self, idx : int) -> Tuple[MxImg, int, int]:
    img_path, family_idx, person_idx = seq[idx]
    img = mx.img.imread(str(img_path))
    return img, family_idx, person_idx
    
  def __len__(self) -> int:
    return len(seq)

In [10]:
def train(normalize : bool = False):
  train_dataset = FamiliesDataset()
  jitter_param = 0.15
  lighting_param = 0.15
  batch_size = 48
  num_workers = 12
  model_name = 'arcface_r100_v1'
  net_name = 'arcface_families_ft'

  arc_r_100_path = os.path.join(pre_trained_models_path, 'arcface_r100')
  sym_path = os.path.join(arc_r_100_path, 'model-symbol.json')
  weight_path = os.path.join(arc_r_100_path, 'model-0000.params')

  snapshots_path = Path(os.path.join(root, 'my_trained_models'))
  if not os.path.exists(str(snapshots_path)):
        os.makedirs(str(snapshots_path))
  num_families = len(families)

  warmup = 200
  lr = 1e-4
  cooldown = 400
  lr_factor = 0.75
  num_epoch = 50
  momentum = 0.9
  wd = 1e-4
  clip_gradient = 1.0
  lr_steps = [8, 14, 25, 35, 40, 50, 60]
  ctx_list = [mx.gpu()]

  transform_img_train = transforms.Compose([
  transforms.RandomColorJitter(brightness=jitter_param, contrast = jitter_param, saturation= jitter_param),
  transforms.RandomLighting(lighting_param),
  #ReJPGTransform(0.3, 70),
  transforms.ToTensor()
  ])

  train_data = mx.gluon.data.DataLoader(
      train_dataset.transform_first(transform_img_train),
      shuffle = True,
      batch_size = batch_size,
      num_workers = num_workers,
      pin_memory = True
  )
  ctx = ctx_list[0]
  print('Load symbols')
  sym = mx.sym.load(str(sym_path))
  print('Loaded symbols')
  if normalize:
    norm_sym = mx.sym.sqrt(sum(sym ** 2, axis = 1, keepdims = True) + 1e-06)
    sym = mx.sym.broadcast_div(sym, norm_sym, name = 'fc_normed') * 32
  sym = mx.sym.FullyConnected(sym, num_hidden = num_families, name = 'fc_classification', lr_mult = 1)
  net = gluon.SymbolBlock([sym], [mx.sym.var('data')])
  net.load_parameters(str(weight_path), ctx = ctx, cast_dtype=True,
                      allow_missing=True, ignore_extra=False)
  net.initialize(mx.init.Normal(), ctx=mx.cpu())
  net.collect_params().reset_ctx(ctx)
  net.hybridize()

  all_losses = [
                ('softmax', gluon.loss.SoftmaxCrossEntropyLoss()),
                #('arc', gluonfr.loss.ArcLoss(num_families, m=0.7, s=32, easy_margin=False)), 
                #('center', gluonfr.loss.CenterLoss(num_families, 512, 1e-1))
                ]
  

  start_lr = 1e-10
  warmup_iter = 0
  end_iter = num_epoch * len(train_data)
  cooldown_start = end_iter - cooldown
  cooldown_iter = 0
  end_lr = 1e-10
  param_dict = net.collect_params()
  trainer = mx.gluon.Trainer(param_dict, 'sgd', {
      'learning_rate': start_lr, 'momentum': momentum, 'wd': wd, 'clip_gradient': clip_gradient})
  lr_counter = 0
  num_batch = len(train_data)

  for epoch in range(num_epoch):
    if epoch == lr_steps[lr_counter]:
      trainer.set_learning_rate(trainer.learning_rate*lr_factor)
      lr_counter += 1

    tic = time.time()
    losses = [0] * len(all_losses)
    metric = mx.metric.Accuracy()
    print(' > training', epoch)
    metric = mx.metric.Accuracy()
    print(' > training', epoch)
    for i, batch in tqdm(enumerate(train_data), total=len(train_data)):
      if warmup_iter < warmup:
        cur_lr = (warmup_iter + 1) * (lr - start_lr) / warmup + start_lr
        trainer.set_learning_rate(cur_lr)
        warmup_iter += 1
      elif cooldown_iter > cooldown_start:
        cur_lr = (end_iter - cooldown_iter) * (trainer.learning_rate - end_lr) / cooldown + end_lr
        trainer.set_learning_rate(cur_lr)
      cooldown_iter += 1

      data = mx.gluon.utils.split_and_load(batch[0] * 255, ctx_list=ctx_list, even_split=False)
      gts = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx_list, even_split=False)

      with ag.record():
        outputs = [net(X) for X in data]
        if np.any([np.any(np.isnan(o.asnumpy())) for os in outputs for o in os]):
          print('OOps!')
          raise RuntimeError
        cur_losses = [[cur_loss(o, l) for (o, l) in zip(outputs, gts)] for _, cur_loss in all_losses]
        metric.update(gts, outputs)
        combined_losses = [cur[0] for cur in zip(*cur_losses)]
        if np.any([np.any(np.isnan(l.asnumpy())) for l in cur_losses[0]]):
          print('OOps2!')
          raise RuntimeError
      for combined_loss in combined_losses:
        combined_loss.backward()
      
      trainer.step(batch_size, ignore_stale_grad=True)
      for idx, cur_loss in enumerate(cur_losses):
        losses[idx] += sum([l.mean().asscalar() for l in cur_loss]) / len(cur_loss)

    if (epoch + 1) % 10 == 0:
      net.export(f'{str(snapshots_path)}/export_{net_name}', epoch = epoch + 1)

    losses = [l / num_batch for l in losses]
    losses_str = [f'{l_name}: {losses[idx]:.3f}' for idx, (l_name, _) in enumerate(all_losses)]
    losses_str = '; '.join(losses_str)
    m_name, m_val = metric.get()
    losses_str += f'| {m_name}: {m_val}'
    print(f'[Epoch {epoch:03d}] {losses_str} | time: {time.time() - tic:.1f}')

In [None]:
if __name__ == '__main__':
    np.random.seed(100)
    mx.random.seed(100)
    train(normalize=False)

100%|██████████| 105/105 [01:14<00:00,  1.40it/s]
  0%|          | 0/105 [00:00<?, ?it/s]

[Epoch 010] softmax: 4.725| accuracy: 0.3527893587452849 | time: 74.9
 > training 11
 > training 11


100%|██████████| 105/105 [01:14<00:00,  1.40it/s]
  0%|          | 0/105 [00:00<?, ?it/s]

[Epoch 011] softmax: 4.672| accuracy: 0.39031169346833433 | time: 74.8
 > training 12
 > training 12


100%|██████████| 105/105 [01:14<00:00,  1.40it/s]
  0%|          | 0/105 [00:00<?, ?it/s]

[Epoch 012] softmax: 4.616| accuracy: 0.42306928727417115 | time: 74.9
 > training 13
 > training 13


 73%|███████▎  | 77/105 [00:55<00:19,  1.41it/s]