In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
!pip install git+https://github.com/rcmalli/keras-vggface.git

Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-9spayvfa
  Running command git clone -q https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-9spayvfa
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25l[?25hdone
  Created wheel for keras-vggface: filename=keras_vggface-0.6-cp36-none-any.whl size=8311 sha256=153702d8b32665440849dfff0c243120664fa9f127753787a8f1236190db5e6b
  Stored in directory: /tmp/pip-ephem-wheel-cache-fup5o0ta/wheels/36/07/46/06c25ce8e9cd396dabe151ea1d8a2bc28dafcb11321c1f3a6d
Successfully built keras-vggface
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.6


In [0]:
import warnings
warnings.filterwarnings('ignore')

In [0]:
import h5py
from collections import defaultdict
from glob import glob
from random import choice,sample
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

In [5]:
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
from keras.layers import Input,Dense,GlobalMaxPool2D,GlobalAvgPool2D,Concatenate,Multiply,Dropout,Subtract,Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
from keras import backend as K
from keras.models import load_model

Using TensorFlow backend.


In [0]:
train_file_path='/content/drive/My Drive/Recognizing_Faces_in_the_Wild/train_relationships.csv'
train_folders_path='/content/drive/My Drive/Recognizing_Faces_in_the_Wild/train/'
val_families='F09' #families which has F09*** in folder name

In [7]:
%%time
all_images=glob(train_folders_path+'*/*/*.jpg') #paths of all images
train_images=[x for x in all_images if val_families not in x] #path of images used for training
val_images=[x for x in all_images if val_families in x] #path of validation images (belonging to families starting with F09***)

CPU times: user 368 ms, sys: 263 ms, total: 631 ms
Wall time: 2min 54s


In [0]:
ppl=[x.split('/')[-3]+'/'+x.split("/")[-2] for x in all_images] #obtaining the people in the format give in train_relationship

In [0]:
#Mapping people to their faces (list of faces)
train_person_to_images_map=defaultdict(list)
for x in train_images:
  train_person_to_images_map[x.split('/')[-3]+'/'+x.split("/")[-2]].append(x)

val_person_to_images_map=defaultdict(list)
for x in val_images:
  val_person_to_images_map[x.split('/')[-3]+'/'+x.split('/')[-2]].append(x)

In [0]:
#Obtaining relationship pairs and converting them to tuples
relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

In [0]:
#Diving the tuples into train and validation
train=[x for x in relationships if val_families not in x[0]]
val=[x for x in relationships if val_families in x[0]]

In [0]:
#reads the image and converts into numpy aarray and finally returns the image processed as required by VGGFace
def img2arr(path):
  img=cv2.imread(path)
  img=np.array(img).astype(np.float)
  return preprocess_input(img,version=2)

In [0]:
#Generator to use with fit_generator to generate data in batches
def data_generator(list_tuples,person_to_images_map,batch_size=16):
  ppl=list(person_to_images_map.keys())
  while True:
    batch_tuples=sample(list_tuples,batch_size//2)
    labels=[1]*len(batch_tuples)
    while len(batch_tuples)<batch_size:
      p1=choice(ppl)
      p2=choice(ppl)

      if p1!=p2 and (p1,p2) not in list_tuples and (p2,p1) not in list_tuples:
        batch_tuples.append((p1,p2))
        labels.append(0)
    for x in batch_tuples:
      if not len(person_to_images_map[x[0]]):
        print(x[0])
    X1=[choice(person_to_images_map[x[0]]) for x in batch_tuples]
    X1=np.array([img2arr(x) for x in X1])
    X2=[choice(person_to_images_map[x[1]]) for x in batch_tuples]
    X2=np.array([img2arr(x) for x in X2])

    yield [X1,X2],labels

<h4>Model Architecture</h4>
<img src='model_architecture.png' />

In [0]:
#Model architecture
def build_model():
  input1=Input(shape=(224,224,3))
  input2=Input(shape=(224,224,3))

  base_model=VGGFace(model='resnet50',include_top=False)

  '''for x in base_model.layers[:-3]:
    x.trainable=True'''
  
  x1=base_model(input1)
  x2=base_model(input2)

  x1=Concatenate(axis=-1)([GlobalMaxPool2D()(x1),GlobalAvgPool2D()(x1)])
  x2=Concatenate(axis=-1)([GlobalMaxPool2D()(x2),GlobalAvgPool2D()(x2)])

  x3=Subtract()([x1,x2])
  x3=Multiply()([x3,x3])
  #x=Multiply()([x3,x3])
  
  #x=Multiply()([x1,x2])
  x1_=Multiply()([x1,x1])
  x2_=Multiply()([x2,x2])
  x4=Subtract()([x1_,x2_])

  x5=Multiply()([x1,x2])

  x=Concatenate(axis=-1)([x3,x4,x5])

  x=Dense(100,activation='relu')(x)
  x=Dropout(0.01)(x)
  out=Dense(1,activation='sigmoid')(x)

  model=Model([input1,input2],out)

  model.compile(loss='binary_crossentropy',metrics=['acc'],optimizer=Adam(0.00001))

  model.summary()

  return model

In [0]:
file_path='/content/drive/My Drive/Recognizing_Faces_in_the_Wild/20190824/vgg_face.h5'

In [0]:
#Using callbacks
checkpoint=ModelCheckpoint(file_path,monitor='val_acc',verbose=1,save_best_only=True,mode='max') #Saves the best model based on val_acc

reduce_lr_on_plateau=ReduceLROnPlateau(monitor='val_acc',mode='max',factor=0.1,patience=20,verbose=1) #Reduces the learning rate when val_acc is not improving

callbacks_list=[checkpoint,reduce_lr_on_plateau]

In [18]:
model=build_model()

history=model.fit_generator(data_generator(train,train_person_to_images_map,batch_size=16),\
                    use_multiprocessing=True,\
                    validation_data=data_generator(val, val_person_to_images_map, batch_size=16),\
                    epochs=100,verbose=1,workers=4,callbacks=callbacks_list,steps_per_epoch=200,validation_steps=100)

W0824 16:28:16.535361 140109749909376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0824 16:28:16.583397 140109749909376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0824 16:28:16.593642 140109749909376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0824 16:28:16.634713 140109749909376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0824 16:28:16.636055 1401097499

Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_notop_resnet50.h5


W0824 16:28:37.291864 140109749909376 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0824 16:28:37.336609 140109749909376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0824 16:28:37.348724 140109749909376 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
vggface_resnet50 (Model)        multiple             23561152    input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
global_max_pooling2d_1 (GlobalM (None, 2048)         0           vggface_resnet50[1][0]           
__________

In [0]:
import pickle
with open('/content/drive/My Drive/Recognizing_Faces_in_the_Wild/20190824/history.pkl','wb') as f:
  pickle.dump(history,f)

In [0]:
#saving the model
model.save('/content/drive/My Drive/Recognizing_Faces_in_the_Wild/20190824/vggface.h5')

In [0]:
model=load_model('/content/drive/My Drive/Recognizing_Faces_in_the_Wild/20190824/vggface.h5')

In [0]:
test_path='/content/drive/My Drive/Recognizing_Faces_in_the_Wild/test/'

In [0]:
submission=pd.read_csv('/content/drive/My Drive/Recognizing_Faces_in_the_Wild/sample_submission.csv',header=0)

In [24]:
submission.head()

Unnamed: 0,img_pair,is_related
0,face05508.jpg-face01210.jpg,0
1,face05750.jpg-face00898.jpg,0
2,face05820.jpg-face03938.jpg,0
3,face02104.jpg-face01172.jpg,0
4,face02428.jpg-face05611.jpg,0


In [0]:
#generates test data in batches
def test_batch(test_pairs,size=32):
  return (test_pairs[pos:pos+size] for pos in range(0,len(test_pairs),size))

In [26]:
predictions=[]

for batch in tqdm(test_batch(submission.img_pair.values)):
    X1 = [x.split("-")[0] for x in batch]
    X1 = np.array([img2arr(test_path + x) for x in X1])

    X2 = [x.split("-")[1] for x in batch]
    X2 = np.array([img2arr(test_path + x) for x in X2])

    pred = model.predict([X1, X2]).ravel().tolist()
    predictions += pred

submission['is_related'] = predictions

166it [33:40, 10.38s/it]


In [0]:
submission.to_csv("/content/drive/My Drive/Recognizing_Faces_in_the_Wild/20190824/predictions.csv", index=False)

<strong>Note</strong>

Used features (x1-x2)^2, (x1^2 - x2^2) and (x1*x2) 

Used different validation sets while training five models and took the average of predictions of all models to improve the score.

