In [16]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from glob import glob
import cv2
import csv
sns.set()

In [2]:
DATA_DIR = 'lfw2'

In [3]:
with open('pairsDevTrain.txt', 'r') as csvfile:
        trainrows = list(csv.reader(csvfile, delimiter='\t'))[1:]

In [4]:
df = pd.DataFrame(trainrows)

In [5]:
trainrows = []
for index, row in df.iterrows():
    if row[3] is None:
        trainrows.append([row[0], row[1], row[0], row[2]])
    else:
        trainrows.append([row[0], row[1], row[2], row[3]])

In [6]:
df = pd.DataFrame(trainrows, columns=['name1', 'num1', 'name2', 'num2'])
df

Unnamed: 0,name1,num1,name2,num2
0,Aaron_Peirsol,1,Aaron_Peirsol,2
1,Aaron_Peirsol,3,Aaron_Peirsol,4
2,Aaron_Sorkin,1,Aaron_Sorkin,2
3,Abdel_Nasser_Assidi,1,Abdel_Nasser_Assidi,2
4,Abdullah,1,Abdullah,3
...,...,...,...,...
2195,Tom_Vilsack,1,Wayne_Ferreira,5
2196,Trisha_Meili,1,Vladimiro_Montesinos,3
2197,Ty_Votaw,1,Wayne_Allard,1
2198,Vytas_Danelius,1,Zaini_Abdullah,1


In [187]:
def load_image(name, num, shape):
    try:
        num = int(num)
        file_lists = glob(f'{DATA_DIR}/{name}/*')
        assert len(file_lists) != 0, "Shouldn't be empty list!"
        file_lists.sort(key=lambda row: int(row.split('_')[-1][:-4]))
        img = cv2.imread(file_lists[num - 1], cv2.IMREAD_COLOR) / 255
        if shape is None:
            return img
        return cv2.resize(img, shape)
    except:
        print("Error in image loading")

In [171]:
def load_pairs(name_1, num_1, name_2, num_2, shape=None):
    return load_image(name_1, num_1, shape), load_image(name_2, num_2, shape)

In [9]:
load_pairs(*df.values[0], (105, 105))[0].shape

(105, 105, 3)

In [172]:
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import backend as K

def get_siamese_model(input_shape):
    model = models.Sequential()
    model.add(layers.Conv2D(64, (10, 10), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(128, (7, 7), activation='relu'))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(128, (4, 4), activation='relu'))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(256, (4, 4), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='sigmoid'))
    return model

In [173]:
def euclidean_dist(vect):
    x, y = vect
    sum_square = K.sum(K.square(x-y), axis = 1, keepdims = True)
    result = K.maximum(sum_square, K.epsilon())
    return result

In [174]:
def euclidean_distance(vecs):
    return K.sqrt(K.sum(vecs, axis=1, keepdims=True))

def subs_square(vecs):
    x, y = vecs
    return K.square(x - y)

In [201]:
img_shape = (105, 105, 3)

input_img1 = layers.Input(img_shape)
input_img2 = layers.Input(img_shape)

    
model = get_siamese_model(img_shape)
siamese_model_img1 = model(input_img1)
siamese_model_img2 = model(input_img2)


# x = layers.Dense(1, activation='sigmoid')([siamese_model_img1, siamese_model_img2])
x = layers.Lambda(euclidean_dist)([siamese_model_img1, siamese_model_img2])
# x = layers.Lambda(euclidean_distance)(x)
x = layers.Dense(1,activation='sigmoid')(x)
siamese_net = models.Model(inputs=[input_img1,input_img2],outputs=x)
siamese_net.summary()

Model: "functional_43"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_59 (InputLayer)           [(None, 105, 105, 3) 0                                            
__________________________________________________________________________________________________
input_60 (InputLayer)           [(None, 105, 105, 3) 0                                            
__________________________________________________________________________________________________
sequential_28 (Sequential)      (None, 4096)         38960448    input_59[0][0]                   
                                                                 input_60[0][0]                   
__________________________________________________________________________________________________
lambda_25 (Lambda)              (None, 1)            0           sequential_28[0][0]  

In [202]:
df = df.sample(frac=1)

In [203]:
batch_size = 64
def train_generator():
    while True:
        for start in range(0, len(df), batch_size):
            x_batch_left = []
            x_batch_right = []
            y_batch = []
            end = min(start + batch_size, len(df))
            df_train_batch = df[start:end]
            for row in df_train_batch.values:
                img1, img2 = load_pairs(*row, (105, 105))
                label = 1 if row[0] == row[2] else 0
                x_batch_left.append(img1)
                x_batch_right.append(img2)
                y_batch.append(label)
            x_batch_left, x_batch_right, y_batch = np.asarray(x_batch_left), np.asarray(x_batch_right), np.asarray(y_batch)
            yield [x_batch_left, x_batch_right], y_batch

In [204]:
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
callbacks = [EarlyStopping(monitor='loss', patience=3, verbose=1, min_delta=1e-4),
            ReduceLROnPlateau(monitor='loss', factor=0.1, patience=1, cooldown=0, min_lr=1e-8, verbose=1)]


optimizer = SGD(lr = 0.001, momentum = 0.5)
siamese_net.compile(optimizer=Adam(lr=0.003), loss="binary_crossentropy",metrics=['accuracy'])
history = siamese_net.fit(x=train_generator(), callbacks=callbacks, epochs=10, verbose = 1, steps_per_epoch=(len(df) // batch_size) + 1,shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00030000000260770325.
Epoch 5/10
Epoch 00005: ReduceLROnPlateau reducing learning rate to 3.000000142492354e-05.
Epoch 6/10
Epoch 00006: ReduceLROnPlateau reducing learning rate to 3.000000106112566e-06.
Epoch 00006: early stopping
