In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from glob import glob
import cv2
import time
import csv
sns.set()
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tqdm import tqdm

In [2]:
DATA_DIR = 'lfw2'
TRAIN_PATH = 'pairsDevTrain.txt'
TEST_PATH = 'pairsDevTest.txt'

In [3]:
def get_rows(path_train, path_test):
    """
    Get the rows of the train and test data
    :param path_train:
    :param path_test:
    :return:
    """
    with open(path_train, 'r') as csvfile:
        trainrows = list(csv.reader(csvfile, delimiter='\t'))[1:]

    with open(path_test, 'r') as csvfile:
        testrows = list(csv.reader(csvfile, delimiter='\t'))[1:]

    return trainrows, testrows

def return_fixed_dataset(df):
    """
    Return the fixed dataset
    :param df:
    :return:
    """
    rows = []
    for index,row in df.iterrows():
        if row[3] is None:
          rows.append([row[0], row[1], row[0], row[2]])
        else:
          rows.append([row[0], row[1], row[2], row[3]])
    return rows

def split_train_test_val(trainrows, testrows):
    """
    Split the train and test data into train, test and validation sets
    :param trainrows:
    :param testrows:
    :return:
    """
    df_train = pd.DataFrame(trainrows)
    df_test = pd.DataFrame(testrows)
    print(df_train.shape)
    trainrows = return_fixed_dataset(df_train)
    testrows = return_fixed_dataset(df_test)
    print(len(trainrows))
    df_train = pd.DataFrame(trainrows, columns=['name1', 'num1', 'name2', 'num2'])
    df_test = pd.DataFrame(testrows, columns=['name1', 'num1', 'name2', 'num2'])
    print(df_train.shape)
    df_train = df_train.sample(frac=1)
    cut_first = int(df_train.shape[0]*0.85)
    df_val = df_train[cut_first:]
    df_train = df_train[:cut_first]
    print(df_val.shape)
    print(df_train.shape)

    return df_train, df_val, df_test

def load_image(name, num, shape):
    """
    Load the image
    :param name:
    :param num:
    :param shape:
    :return:
    """
    try:
        num = int(num)
        file_lists = glob(f'{DATA_DIR}/{name}/*')
        assert len(file_lists) != 0, "Shouldn't be empty list!"
        file_lists.sort(key=lambda row: int(row.split('_')[-1][:-4]))
        img = cv2.imread(file_lists[num - 1], cv2.IMREAD_GRAYSCALE) / 255
        if shape is None:
            return img
        return cv2.resize(img, shape).reshape(105, 105, 1)
    except:
        print("Error in image loading")

def load_pairs(name_1, num_1, name_2, num_2, shape=None):
    """
    Load the pairs of images
    :param name_1:
    :param num_1:
    :param name_2:
    :param num_2:
    :param shape:
    :return:
    """
    return load_image(name_1, num_1, shape), load_image(name_2, num_2, shape)

In [4]:
def euclidean_dist(vect):
    """
    Calculate the euclidean distance
    :param vect:
    :return:
    """
    x, y = vect
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    result = K.maximum(sum_square, K.epsilon())
    return result


def euclidean_distance(vecs):
    """
    Calculate the euclidean distance
    :param vecs:
    :return:
    """
    x, y = vecs
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))


def subs_square(vecs):
    """
    Calculate the square of the difference
    :param vecs:
    :return:
    """
    x, y = vecs
    return K.square(x - y)


def mae(vecs):
    """
    Calculate the mean absolute error
    :param vecs:
    :return:
    """
    return K.abs(vecs[0] - vecs[1])

In [5]:
def get_siamese_model(input_shape, kernel_initializer, bias_initializer, kernel_regularizer=0.01, kernel_regularizer_dense = 0.0001):
    """
    create the siamese model based on the input arguments
    :param input_shape:
    :param kernel_initializer:
    :param bias_initializer:
    :param kernel_regularizer:
    :param kernel_regularizer_dense:
    :return:
    """
    K.clear_session()
    model = models.Sequential()
    model.add(layers.Conv2D(64, (10, 10),
                            activation='relu',
                            input_shape=input_shape,
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=l2(kernel_regularizer)))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(128, (7, 7), activation='relu',
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=l2(kernel_regularizer)))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(128, (4, 4), activation='relu',
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=l2(kernel_regularizer)))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Conv2D(256, (4, 4), activation='relu',
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=l2(kernel_regularizer)))
    model.add(BatchNormalization())

    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='sigmoid',
                           kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                           kernel_regularizer=l2(kernel_regularizer_dense)))
    return model


def build_network(img_shape, kernel_initializer, bias_initializer):
    """
    build the network based on the previously built model, this is the last phase of the siamese, with the final dense layer
    :param img_shape:
    :param kernel_initializer:
    :param bias_initializer:
    :return:
    """
    input_img1 = layers.Input(img_shape)
    input_img2 = layers.Input(img_shape)

    model = get_siamese_model(img_shape, kernel_initializer, bias_initializer)

    siamese_model_img1 = model(input_img1)
    siamese_model_img2 = model(input_img2)

    x = layers.Lambda(mae)([siamese_model_img1, siamese_model_img2])

    x = layers.Dense(1, activation='sigmoid', kernel_initializer=kernel_initializer,
                     bias_initializer=bias_initializer)(x)

    siamese_net = models.Model(inputs=[input_img1, input_img2], outputs=x)
    # siamese_net.summary()

    return siamese_net

In [6]:

def train_generator(df, batch_size):
    """
    generator for loading images to the network
    :param df:
    :param batch_size:
    :return:
    """
    while True:
        for start in range(0, len(df), batch_size):
            x_batch_left = []
            x_batch_right = []
            y_batch = []
            end = min(start + batch_size, len(df))
            df_train_batch = df[start:end]
            for row in df_train_batch.values:
                img1, img2 = load_pairs(*row, (105, 105))
                label = 1 if row[0] == row[2] else 0
                x_batch_left.append(img1)
                x_batch_right.append(img2)
                y_batch.append(label)
            x_batch_left, x_batch_right, y_batch = np.asarray(x_batch_left), np.asarray(x_batch_right), np.asarray(y_batch)
            yield [x_batch_left, x_batch_right], y_batch

In [13]:

def train(lr, decay_rate, optimizer, batch_size, generator, train_set, val_set, siamese_net, epochs, patience):
    """
    train the network
    :param lr:
    :param decay_rate:
    :param optimizer:
    :param batch_size:
    :param generator:
    :param train_set:
    :param val_set:
    :param siamese_net:
    :param epochs:
    :param patience:
    :return:
    """
    if optimizer == "SGD":
        optimizer = SGD(lr=0.001, momentum=0.5)
    else:
        optimizer = Adam(learning_rate=ExponentialDecay(lr, 100000, decay_rate))

    train_generator = generator(train_set, batch_size)
    val_generator = generator(val_set, batch_size)
    early_stop = EarlyStopping(monitor='val_loss', verbose=1, patience=patience, restore_best_weights=True)

    siamese_net.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])

    history = siamese_net.fit(train_generator,
                              callbacks=[early_stop],
                              epochs=epochs,
                              validation_data=val_generator,
                              validation_steps=(len(val_set) // batch_size) + 1,
                              verbose=0,
                              steps_per_epoch=(len(train_set) // batch_size) + 1,
                              shuffle=True)
    return history

In [14]:
print("Loading data...")
trainrows, testrows = get_rows(TRAIN_PATH, TEST_PATH)
df_train, df_val, df_test = split_train_test_val(trainrows, testrows)
print(df_train.shape)
print("Data loaded.")

Loading data...
(2200, 4)
2200
(2200, 4)
(330, 4)
(1870, 4)
(1870, 4)
Data loaded.


In [17]:

print("Runing hyperparameter tuning...")
lr = [0.001, 0.0001]
batch_size = [128]
epochs = [10, 30, 50]
patience = [ 10, 20]
optimizer = ['Adam', 'SGD']
results = {'lr': [], 'batch_size': [], 'epochs': [], 'patience': [], 'optimizer': []
    , 'loss': [], 'accuracy': []}

network_params = {
    'img_shape': (105, 105, 1),
    'kernel_initializer': RandomNormal(mean=0, stddev=0.01),
    'bias_initializer': RandomNormal(mean=0.5, stddev=0.01),
}

model_params = {
    'lr': 0.0001,
    'decay_rate': 0.98,
    'optimizer': 'Adam',
    'batch_size': 64,
    'generator': train_generator,
    'train_set': df_train,
    'val_set': df_val,
    'siamese_net': None,
    'epochs': 30,
    'patience': 5
}

for l in tqdm(lr):
    model_params['lr'] = l
    
    for bs in batch_size:
        model_params['batch_size'] = bs
        for ep in epochs:
            model_params['epochs'] = ep
            for pat in patience:
                model_params['patience'] = pat
                for opt in optimizer:
                    model_params['optimizer'] = opt

                    siamese_net = build_network(**network_params)
                    model_params['siamese_net'] = siamese_net
                    print('*' * 50)
                    print(
                        f'''Training with learning_rate: {l}, batch-size: {bs}, epochs: {ep}, patience: {pat}, optimizer: {opt}''')
                    start = time.time()
                    history = train(**model_params)
                    test_gen = train_generator(df_test, 64)
                    loss, accuracy = siamese_net.evaluate(test_gen, batch_size=64, steps=(len(df_test) // 64) + 1)
                    end = time.time()
                    print(f"Training finished after {end - start} time")
                    print(f"Accuracy: {accuracy}")
                    print(f"loss: {loss}")
                    print(end - start)
                    # print(f"validation loss: {history.history['val_loss']}")
                    print('*' * 50)
                    results['lr'].append(l)
                    results['batch_size'].append(bs)
                    results['epochs'].append(ep)
                    results['patience'].append(pat)
                    results['optimizer'].append(opt)
                    results['loss'].append(loss)
                    results['accuracy'].append(accuracy)

df_results = pd.DataFrame.from_dict(results)
df_results.to_csv('results.csv')

  0%|                                                                                                                                                                                                      | 0/2 [00:00<?, ?it/s]

Runing hyperparameter tuning...
**************************************************
Training with learning_rate: 0.001, batch-size: 128, epochs: 10, patience: 10, optimizer: Adam


  0%|                                                                                                                                                                                                      | 0/2 [00:22<?, ?it/s]


ResourceExhaustedError:  OOM when allocating tensor with shape[128,128,42,42] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node functional_1/sequential/batch_normalization_1/FusedBatchNormV3 (defined at <ipython-input-13-461f6d5d744f>:27) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_168201]

Function call stack:
train_function
