In [None]:
!pip install mxnet
!pip install mxnet-cu101
!pip install insightface

In [None]:
!pip install gluonfr
!pip install onnxruntime
!pip install opencv-python

In [25]:
import numpy as np
import mxnet as mx
from mxnet import gluon
from pathlib import Path
import os, os.path
from tqdm import tqdm
from typing import Tuple, Optional, Generator, Any, List, Union, Callable

In [5]:
import pandas as pd

In [26]:
Img = np.ndarray # HxWxC image in numpy (read with cv2.imread
MxImg = mx.nd.NDArray  # HxWxC image in mxnet (read with mx.img.imread or converted from Img)
Embedding = mx.nd.NDArray  # 1x512 image embedding (CNN output given input image)
MxImgArray = mx.nd.NDArray  # NxCxHxW batch of input images
Labels = mx.nd.NDArray  # Nx1 float unscaled kinship relation labels
ImgOrPath = Union[Img, Path]
ImgPairOrPath = Tuple[ImgOrPath, ImgOrPath]
PairPath = Tuple[Path, Path]

In [27]:
from google.colab import drive
drive.mount('/content/drive', force_remount= True)

Mounted at /content/drive


In [28]:
root = '/content/drive/My Drive/DeepLearning/KaggleCompetition'
test_path = Path(os.path.join(root, 'test-processed'))

In [69]:
def norm(emb):
    return np.sqrt(np.sum(emb ** 2))

def cosine(emb1, emb2):
    return np.dot(emb1, emb2) / (norm(emb1) * norm(emb2))

In [29]:
def predict(model: Callable[[Path, Path], Labels], pair_list: List[PairPath]) -> Labels:
  predictions = []
  for idx, (path1, path2) in tqdm(enumerate(pair_list), total = len(pair_list)):
    cur_prediction = model(path1, path2)
    predictions.append(cur_prediction)
  return np.stack(predictions, axis = 0)

In [67]:
class CompareModel(object):
  def __init__(self, model_name : str, ctx : mx.Context = mx.cpu()):
      sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 20)
      sym = sym.get_internals()['fc1_output']
      model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
      data_shape = (1,3,112,112)
      model.bind(data_shapes=[('data', data_shape)], for_training=False)
      model.set_params(arg_params, aux_params)
      #warmup
      data = mx.nd.zeros(shape=data_shape)
      db = mx.io.DataBatch(data=(data,))
      model.forward(db, is_train=False)
      embedding = model.get_outputs()[0].asnumpy()
      self.model = model
      self.embeddings = dict()
      self.metric = cosine

  def get_embedding(self, im_path : Path) -> Embedding:
    if not im_path in self.embeddings:
      img = mx.img.imread(str(im_path)).transpose((2, 0, 1)).expand_dims(0).astype(np.float32)
      batch = mx.io.DataBatch([img])
      self.model.forward(batch, is_train=False)
      self.embeddings[im_path] = self.model.get_outputs()[0][0].asnumpy()
    return self.embeddings[im_path]

  
  def __call__(self, path1 : Path, path2 : Path) -> Labels:
    emb1 = self.get_embedding(path1)
    emb2 = self.get_embedding(path2)
    return self.metric(emb1, emb2)


In [None]:
test_csv_path = Path(os.path.join(root, 'test_ds.csv'))
test_output_path = Path(os.path.join(root, 'test-processed'))
model_name = Path(os.path.join(root, 'my_trained_models', 'export_arcface_families_ft'))
pairs = []
with open(test_csv_path, 'r') as f:
  for line in f:
    line = line.strip()
    if len(line) < 1:
      continue
    idx, img1, img2 = line.split(',')
    if not idx.isnumeric():
      print(f'idx is not a number : {idx}')
      continue
    face1_path = test_output_path / img1
    face2_path = test_output_path / img2
    pairs.append((face1_path, face2_path))
    
ctx = mx.gpu()

model = CompareModel(model_name)
model.metric = cosine
predictions = predict(model, pairs)

print('predictions length', len(predictions))


d = {'index': np.arange(0, 3000, 1), 'label':predictions}
submissionfile = pd.DataFrame(data=d)
submissionfile = submissionfile.round()

submissionfile.astype("int64").to_csv("/gdrive/MyDrive/DeepLearning/KaggleCompetition/vn2302.csv", index=False)
