# Image similarity using Siamese network

In [1]:
# Install VGGFACE library whose pretrained model is used except a last few layers, which are finetuned per use case.
# ! pip install git+https://github.com/rcmalli/keras-vggface.git

In [2]:
import os
import glob
from keras_vggface.utils import preprocess_input

import numpy as np
import pandas as pd

from keras.preprocessing import image
import tensorflow as tf
from sklearn.metrics import roc_auc_score

import cv2

In [3]:
pd

<module 'pandas' from '/Users/gveni/opt/anaconda3/envs/tensorflowkeras_learning/lib/python3.6/site-packages/pandas/__init__.py'>

In [4]:
!pip show cvxopt



In [5]:
# data location
parent_dir = '/home/ec2-user/ebs/data/cv_data/recognizing-faces-in-the-wild'
trainfile_path = os.path.join(parent_dir, 'train_relationships.csv')
trainfolder_path = os.path.join(parent_dir, 'train/')
testfolder_path = os.path.join(parent_dir, 'test')
val_families = 'F09'

In [7]:
all_images = glob.glob(trainfolder_path + '*/*/*.jpg')
all_images[:5]

['/Users/gveni/Documents/data/cv_data/recognizing-faces-in-the-wild/train/F0832/MID1/P08797_face2.jpg',
 '/Users/gveni/Documents/data/cv_data/recognizing-faces-in-the-wild/train/F0832/MID1/P08791_face1.jpg',
 '/Users/gveni/Documents/data/cv_data/recognizing-faces-in-the-wild/train/F0832/MID1/P08795_face1.jpg',
 '/Users/gveni/Documents/data/cv_data/recognizing-faces-in-the-wild/train/F0832/MID1/P08793_face1.jpg',
 '/Users/gveni/Documents/data/cv_data/recognizing-faces-in-the-wild/train/F0832/MID1/P08799_face2.jpg']

In [10]:
train_images = [x for x in all_images if val_families not in x]
val_images = [x for x in all_images if val_families in x]
print('#(training images)', len(train_images))
print('#(validation images)', len(val_images))

#(training images) 11232
#(validation images) 1147


In [15]:
ppl = [x.split('/')[-3] + '/' + x.split('/')[-2] for x in all_images]
print(ppl[:10])

['F0832/MID1', 'F0832/MID1', 'F0832/MID1', 'F0832/MID1', 'F0832/MID1', 'F0832/MID1', 'F0832/MID1', 'F0832/MID2', 'F0832/MID2', 'F0832/MID2']


In [17]:
from collections import defaultdict

train_person_to_images_map = defaultdict(list)
for sample_img in train_images:
    train_person_to_images_map[sample_img.split('/')[-3] + '/' + sample_img.split('/')[-2]].append(sample_img)
    
val_person_to_images_map = defaultdict(list)
for sample_img in train_images:
    val_person_to_images_map[sample_img.split('/')[-3] + '/' + sample_img.split('/')[-2]].append(sample_img)
    
train_person_to_images_map = dict(train_person_to_images_map)
val_person_to_images_map = dict(val_person_to_images_map)
        
    

In [18]:
# QA
for k in train_person_to_images_map.keys():
    if len(train_person_to_images_map[k]) == 0:
        print("Train Damn")
        print(k)
        

for k in val_person_to_images_map.keys():
    if len(val_person_to_images_map[k]) == 0:
        print("VAL Damn")
        print(k)

In [23]:
relationships = pd.read_csv(trainfile_path)
relationships = list(zip(relationships['p1'].values, relationships['p2'].values))
print('Total number of relationnship pairs:', len(relationships))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]
print('After filtering, total number of relationnship pairs:', len(relationships))                 

Total number of relationnship pairs: 3598
After filtering, total number of relationnship pairs: 3362


In [24]:
#collect train and validation set of relationship pairs
train = [x for x in relationships if val_families not in x[0]]
val = [x for x in relationships if val_families in x[0]]

In [31]:
# QA
print(train[:5])
print('Total training pairs', len(train))

[('F0002/MID1', 'F0002/MID3'), ('F0002/MID2', 'F0002/MID3'), ('F0005/MID1', 'F0005/MID2'), ('F0005/MID3', 'F0005/MID2'), ('F0009/MID1', 'F0009/MID4')]
Total training pairs 3066


In [32]:
print(val[:5])
print('Total validation pairs', len(val))

[('F0900/MID2', 'F0900/MID1'), ('F0900/MID3', 'F0900/MID1'), ('F0901/MID1', 'F0901/MID4'), ('F0901/MID2', 'F0901/MID1'), ('F0901/MID2', 'F0901/MID4')]
Total validation pairs 296


In [35]:
def auroc(y_true, y_pred):
    return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)

In [43]:
def read_img(path):
    img = image.load_img(path, target_size=(197, 197))
    img = np.array(img).astype('float')
    return preprocess_input(img, version=2)

In [49]:
from random import choice, sample

def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        # sample(): chooses k unique random elements from a population sequence
        # filling half batch tuples with 1 labels 
        batch_tuples = sample(list_tuples, batch_size // 2)
        # assign 1 label to all pairs given in relationships file
        labels = [1] * len(batch_tuples)
        # filling half batch tuples with 0 labels 
        while len(batch_tuples) < batch_size:
            p1 = choice(ppl)
            p2 = choice(ppl)
            
            if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
                batch_tuples.append((p1, p2))
                labels.append(0)
        
        for x in batch_tuples:
            if not len(person_to_images_map[x[0]]):
                print(x[0])
            
        # select a single image out of many provided 
        X0 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
        X0 = np.array([read_img(x) for x in X0])
        
        X1 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
        X1 = np.array([read_img(x) for x in X1])
        
    return [X0, X1], labels
        

In [40]:
from keras.layers import Input, Dense, Flatten, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, Add, Conv2D
from keras.models import Model
from keras.optimizers import Adam
from keras_vggface.vggface import VGGFace

def baseline_model():
    input_1 = Input(shape=(197, 197, 3))
    input_2 = Input(shape=(197, 197, 3))
    
    base_model = VGGFace(model='resnet50', include_top=False)
    
    # use pretrained mdoel for all layers except last three layers
    # top layers used for feature engineering
    # later layers are fine tuned to make decisions
    for x in base_model.layers[:-3]:
        x.trainiable = False
          
    x1 = base_model(input_1)
    x2 = base_model(input_2)
    
    x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)])
    
    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])
    
    x1_ = Multiply()([x1, x1])
    x2_ = Multiply()([x2, x2])
    x4 = Subtract()([x1_, x2_])
    x = Concatenate(axis=-1)([x4, x3])
    
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.01)(x)
    out = Dense(1, activation='sigmoid')(x)
    
    model = Model([input_1, input_2], out)
    
    model.compile(loss='binary_crossentropy', metrics = ['acc', auroc], optimizer = Adam(0.0001))
    
    model.summary()
                  
    return model

In [41]:
import h5py
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

file_path = 'vgg_face.h5'

checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

reduce_on_plateau = ReduceLROnPlateau(monitor='val_acc', mode='max', factor=0.1, patience=20, verbose=1)

callbacks_list = [checkpoint, reduce_on_plateau]

model = baseline_model()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 197, 197, 3) 0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, 197, 197, 3) 0                                            
__________________________________________________________________________________________________
vggface_resnet50 (Model)        multiple             23561152    input_7[0][0]                    
                                                                 input_8[0][0]                    
__________________________________________________________________________________________________
global_max_pooling2d_4 (GlobalM (None, 2048)         0           vggface_resnet50[1][0]       

In [50]:
model.fit_generator(gen(train, train_person_to_images_map, batch_size=16), 
                    use_multiprocessing=True, 
                    validation_data=gen(val, val_person_to_images_map, batch_size=16), 
                    epochs=10, verbose=1, workers=4, callbacks=callbacks_list, 
                    steps_per_epoch=200, validation_steps=100)

KeyboardInterrupt: 

In [None]:
def chunker(seq, size=32):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


In [None]:
from tqdm import tqdm

submission = pd.read_csv(os.path.join(parent_path, 'sample_submission.csv'))

predictions = []

for batch in tqdm(chunker(submission.img_pair.values)):
    X1 = [x.split("-")[0] for x in batch]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X2 = [x.split("-")[1] for x in batch]
    X2 = np.array([read_img(test_path + x) for x in X2])

    pred = model.predict([X1, X2]).ravel().tolist()
    predictions += pred

submission['is_related'] = predictions

submission.to_csv(os.path.join(parent_path, "vgg_face.csv"), index=False)