**Face Alignment**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!git clone https://github.com/davisking/dlib-models

Cloning into 'dlib-models'...
remote: Enumerating objects: 186, done.[K
remote: Counting objects: 100% (32/32), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 186 (delta 17), reused 11 (delta 1), pack-reused 154[K
Receiving objects: 100% (186/186), 408.51 MiB | 37.71 MiB/s, done.
Resolving deltas: 100% (89/89), done.


In [None]:
!bzip2 -d '/content/dlib-models/shape_predictor_5_face_landmarks.dat.bz2'

In [None]:
# install dlib (will take 10 or more minutes)
!apt update
!apt install -y cmake
!pip install dlib

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Get:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release
Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Hit:8 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:10 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [69.5 kB]
Hit:11 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:13 http://ppa.launchpad.net/deadsnakes/

In [None]:
!pip install mtcnn
!pip install keras-facenet

Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 5.2 MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1
Collecting keras-facenet
  Downloading keras-facenet-0.3.2.tar.gz (10 kB)
Building wheels for collected packages: keras-facenet
  Building wheel for keras-facenet (setup.py) ... [?25l[?25hdone
  Created wheel for keras-facenet: filename=keras_facenet-0.3.2-py3-none-any.whl size=10386 sha256=9b7a91a084530d392b8cf3266e416f9e56282eecaf295ffd60048e2e33bd8737
  Stored in directory: /root/.cache/pip/wheels/2b/76/1a/a29ceb07bfca098e2b0bc8690d550753a0384cbed26d5f47ee
Successfully built keras-facenet
Installing collected packages: keras-facenet
Successfully installed keras-facenet-0.3.2


In [None]:
import cv2
import numpy as np
from PIL import Image
import dlib
from google.colab.patches import cv2_imshow


detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('dlib-models/shape_predictor_5_face_landmarks.dat')

def shape_to_normal(shape):
    shape_normal = []
    for i in range(0, 5):
        shape_normal.append((i, (shape.part(i).x, shape.part(i).y)))
    return shape_normal

def get_eyes_nose_dlib(shape):
    nose = shape[4][1]
    left_eye_x = int(shape[3][1][0] + shape[2][1][0]) // 2
    left_eye_y = int(shape[3][1][1] + shape[2][1][1]) // 2
    right_eyes_x = int(shape[1][1][0] + shape[0][1][0]) // 2
    right_eyes_y = int(shape[1][1][1] + shape[0][1][1]) // 2
    return nose, (left_eye_x, left_eye_y), (right_eyes_x, right_eyes_y)

def distance(a, b):
    return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)

def cosine_formula(length_line1, length_line2, length_line3):
    cos_a = -(length_line3 ** 2 - length_line2 ** 2 - length_line1 ** 2) / (2 * length_line2 * length_line1)
    return cos_a

def rotate_point(origin, point, angle):
    ox, oy = origin
    px, py = point

    qx = ox + np.cos(angle) * (px - ox) - np.sin(angle) * (py - oy)
    qy = oy + np.sin(angle) * (px - ox) + np.cos(angle) * (py - oy)
    return qx, qy


def is_between(point1, point2, point3, extra_point):
    c1 = (point2[0] - point1[0]) * (extra_point[1] - point1[1]) - (point2[1] - point1[1]) * (extra_point[0] - point1[0])
    c2 = (point3[0] - point2[0]) * (extra_point[1] - point2[1]) - (point3[1] - point2[1]) * (extra_point[0] - point2[0])
    c3 = (point1[0] - point3[0]) * (extra_point[1] - point3[1]) - (point1[1] - point3[1]) * (extra_point[0] - point3[0])
    if (c1 < 0 and c2 < 0 and c3 < 0) or (c1 > 0 and c2 > 0 and c3 > 0):
        return True
    else:
        return False

def image_alignment(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 0)
    if len(rects) > 0:
        for rect in rects:
            x = rect.left()
            y = rect.top()
            w = rect.right()
            h = rect.bottom()
            shape = predictor(gray, rect)
        shape = shape_to_normal(shape)
        nose, left_eye, right_eye = get_eyes_nose_dlib(shape)
        center_of_forehead = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)
        center_pred = (int((x + w) / 2), int((y + y) / 2))
        length_line1 = distance(center_of_forehead, nose)
        length_line2 = distance(center_pred, nose)
        length_line3 = distance(center_pred, center_of_forehead)
        cos_a = cosine_formula(length_line1, length_line2, length_line3)
        angle = np.arccos(cos_a)
        rotated_point = rotate_point(nose, center_of_forehead, angle)
        rotated_point = (int(rotated_point[0]), int(rotated_point[1]))
        if is_between(nose, center_of_forehead, center_pred, rotated_point):
            angle = np.degrees(-angle)
        else:
            angle = np.degrees(angle)
        img = Image.fromarray(img)
        img_aligned = np.array(img.rotate(angle))
        return img_aligned
    else: return img

**Face Extraction**

In [None]:
from mtcnn.mtcnn import MTCNN
import time
detector1 = MTCNN()
def face_extract(img):
		img1 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
		faces = detector1.detect_faces(img1)
		if len(faces)==0:
			return img
		x, y, width, height = faces[0]['box']
		face_aligned =  img1[y:y+height,x:x+width]
		face_aligned = cv2.cvtColor(face_aligned, cv2.COLOR_RGB2BGR)
		return face_aligned

**Reading input csv and preprocessing to create training data**

In [2]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/ES_FaceMatch_Dataset/test.csv')
df.head()

Unnamed: 0,image1,image2
0,84770376235978.jpg,70098827925517.jpg
1,51285551988172.jpg,40352160634341.jpg
2,92104157409800.jpg,43993355472481.jpg
3,17278801258247.jpg,39952763632406.jpg
4,69669552075188.jpg,11563244873988.jpg


In [None]:
from tqdm import tqdm
from keras_facenet import FaceNet

x_test = []
problematic = []
embedder = FaceNet()
for i in tqdm(df.iterrows()):
  try:
      ii = cv2.imread('/content/drive/MyDrive/ES_FaceMatch_Dataset/dataset_images/{}'.format(i[1][0]))
      qq = image_alignment(ii)
      tt = face_extract(qq)
      ii1 = cv2.imread('/content/drive/MyDrive/ES_FaceMatch_Dataset/dataset_images/{}'.format(i[1][1]))
      qq1 = image_alignment(ii1)
      tt1 = face_extract(qq1)
      img = np.reshape(tt, (1,tt.shape[0], -1,3))
      embedding = embedder.embeddings(img)
      img1 = np.reshape(tt1, (1,tt1.shape[0], -1,3))
      embedding1 = embedder.embeddings(img1)
      comb = np.concatenate((embedding,embedding1), axis=-1)
      x_test.append(comb)
  except:
    problematic.append(i[0])
    print(i[0])

xx= np.array(x_test)
np.save('/content/drive/MyDrive/face_match_data/xtest.npy', xx)

4997it [2:00:55,  1.45s/it]


In [3]:
import cv2
import numpy as np
from PIL import Image
import dlib
from google.colab.patches import cv2_imshow

**Loading training data**

In [4]:
x_train1 = np.load('/content/drive/MyDrive/face_match_data/x15000.npy') 
y_train1 = np.load('/content/drive/MyDrive/face_match_data/y15000.npy')
x_train2 = np.load('/content/drive/MyDrive/face_match_data/x20000.npy')
y_train2 = np.load('/content/drive/MyDrive/face_match_data/y20000.npy')
x_train3 = np.load('/content/drive/MyDrive/face_match_data/x23916.npy')
y_train3 = np.load('/content/drive/MyDrive/face_match_data/y23916.npy')
xtest = np.load('/content/drive/MyDrive/face_match_data/xtest.npy')

In [5]:
tt = np.concatenate([x_train1,x_train2,x_train3], axis = 0)
tt.shape

(23917, 1, 1024)

In [6]:
yy = np.concatenate([y_train1,y_train2,y_train3], axis = 0)
yy.shape

(23917,)

In [7]:
xx = tt.reshape([23917,1024])
xx.shape

(23917, 1024)

In [11]:
xtest = xtest.reshape([4997,1024])
xtest.shape

(4997, 1024)

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dense, Activation

X_train, X_test, y_train, y_test = train_test_split(xx, yy, test_size=0.2, random_state = 1)

In [60]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import *
from keras.regularizers import l2

model = Sequential()
model.add(Dense(512,  input_shape = (1024,)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dropout(0.2))
model.add(Dense(256))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dropout(0.2))
model.add(Dense(256))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dropout(0.1))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dense(32))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dense(1, activation= 'sigmoid'))
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 512)               524800    
_________________________________________________________________
batch_normalization_9 (Batch (None, 512)               2048      
_________________________________________________________________
re_lu (ReLU)                 (None, 512)               0         
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               131328    
_________________________________________________________________
batch_normalization_10 (Batc (None, 256)               1024      
_________________________________________________________________
re_lu_1 (ReLU)               (None, 256)              

In [61]:
import tensorflow as tf
import keras 
checkpoint = tf.keras.callbacks.ModelCheckpoint("model2_weights.h5",save_freq='epoch', save_best_only = True, monitor='val_accuracy',
                            save_weights_only=True, mode='max', verbose=0)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=3, min_lr=0.00001, mode='auto')
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2,callbacks = [reduce_lr, checkpoint])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [62]:
model.load_weights('/content/model2_weights.h5')

In [63]:
pred = model.predict(X_test) > 0.5
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.96      0.93      0.95      3044
           1       0.89      0.94      0.91      1740

    accuracy                           0.93      4784
   macro avg       0.93      0.94      0.93      4784
weighted avg       0.94      0.93      0.93      4784



In [37]:
from xgboost import XGBClassifier
clasifier = XGBClassifier(objective= "binary:logistic",
     eval_metric= "error",
     tree_method= "gpu_hist",
     max_depth= 10,
     subsample= 0.8,
     n_estimators = 2000,
     eta = 0.00001)
clasifier.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=1e-05,
              eval_metric='error', gamma=0, learning_rate=0.1, max_delta_step=0,
              max_depth=10, min_child_weight=1, missing=None, n_estimators=2000,
              n_jobs=1, nthread=None, objective='binary:logistic',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=None, silent=None, subsample=0.8, tree_method='gpu_hist',
              verbosity=1)

In [38]:
y_pred = clasifier.predict(X_test) >0.5
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96      3044
           1       0.93      0.93      0.93      1740

    accuracy                           0.95      4784
   macro avg       0.94      0.94      0.94      4784
weighted avg       0.95      0.95      0.95      4784



In [64]:
ypred = model.predict(xtest) >0.5

In [65]:
cnt = 0 
for y in ypred:
  if y: cnt+=1

cnt

1879

In [66]:
df['pred'] = ypred


In [67]:
df['ttyl'] = (df['pred'] != df['pred2']).astype(int)
df

Unnamed: 0,image1,image2,pred,pred2,ttyl
0,84770376235978.jpg,70098827925517.jpg,False,False,0
1,51285551988172.jpg,40352160634341.jpg,False,False,0
2,92104157409800.jpg,43993355472481.jpg,True,True,0
3,17278801258247.jpg,39952763632406.jpg,False,False,0
4,69669552075188.jpg,11563244873988.jpg,False,False,0
...,...,...,...,...,...
4992,30470682361984.jpg,31915544799803.jpg,True,False,1
4993,38633214403572.jpg,17760910482671.jpg,False,False,0
4994,66870453878736.jpg,76054148475027.jpg,False,False,0
4995,25801526428363.jpg,47928524608159.jpg,False,False,0


In [68]:
df['ttyl'].sum()

237

In [69]:
ds = df.drop(['pred2','ttyl'],axis =1)
ds['pred'] = ds['pred'].astype('int')
ds.rename(columns = {'pred':'label_pred'}, inplace = True)
ds

Unnamed: 0,image1,image2,label_pred
0,84770376235978.jpg,70098827925517.jpg,0
1,51285551988172.jpg,40352160634341.jpg,0
2,92104157409800.jpg,43993355472481.jpg,1
3,17278801258247.jpg,39952763632406.jpg,0
4,69669552075188.jpg,11563244873988.jpg,0
...,...,...,...
4992,30470682361984.jpg,31915544799803.jpg,1
4993,38633214403572.jpg,17760910482671.jpg,0
4994,66870453878736.jpg,76054148475027.jpg,0
4995,25801526428363.jpg,47928524608159.jpg,0


In [71]:
ds.to_csv('submission.csv',index=False)