In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/Project/Face Comparison/project.zip', 'r') as zip_ref:
    zip_ref.extractall('./')

In [None]:
'READ ONE TEXT FILE'
def get_feature(file_path):
  with open(file_path, 'r') as f:
    img_embedding = f.read()[2:-2].split(' ')
    img_embedding = [float(each_num.replace('\n', '')) for each_num in img_embedding if each_num!='']
  return img_embedding

sample_features = get_feature('/content/lfw/AJ_Cook/AJ_Cook_0001.txt')
print('LEN FEATURE IS "{}" AND ONE SAMPLE IS --> {}.'.format(len(sample_features), sample_features))

In [None]:
import keras
from keras.layers import Dense, Input, Lambda
from keras.models import Model
import pandas as pd
import numpy as np
import random
from keras import backend as K
import statistics
from sklearn.metrics import accuracy_score

In [None]:
images_path = './lfw/'
train_path = '/content/pairsDevTrain.txt'
test_path = '/content/pairsDevTest.txt'


def generate_image_pairs(images_path, file_path):
  with open(file_path, 'r') as f:
    pairs_train = f.readlines()
    pairs_train = [pair.replace('\n', '').split('\t') for pair in pairs_train]
    pairs_train_pos = pairs_train[1:int(pairs_train[0][0])+1]
    pairs_train_neg = pairs_train[int(pairs_train[0][0])+1:]

  pairs_train_pos_df = pd.DataFrame(pairs_train_pos, columns=['Folder', 'Image1', 'Image2'])
  pairs_train_neg_df = pd.DataFrame(pairs_train_neg, columns=['Folder1', 'Image1', 'Folder2', 'Image2'])

  X_POS = []
  'SAME IMAGES PAIRS'
  for index, each_pair_pos in pairs_train_pos_df.iterrows():
    
    image1_feature = get_feature(images_path+each_pair_pos['Folder']+'/'+each_pair_pos['Folder']+'_'+
                                 f"{int(each_pair_pos['Image1']):04}"+'.txt')
    image2_feature = get_feature(images_path+each_pair_pos['Folder']+'/'+each_pair_pos['Folder']+'_'+
                                 f"{int(each_pair_pos['Image2']):04}"+'.txt')
    X_POS.append([image1_feature, image2_feature, 1])

  X_NEG = []
  'DIFFERENT IMAGES PAIRS'
  for index, each_pair_neg in pairs_train_neg_df.iterrows():
    image1_feature = get_feature(images_path+each_pair_neg['Folder1']+'/'+each_pair_neg['Folder1']+
                                 '_'+f"{int(each_pair_neg['Image1']):04}"+'.txt')
    image2_feature = get_feature(images_path+each_pair_neg['Folder2']+'/'+each_pair_neg['Folder2']+
                                 '_'+f"{int(each_pair_neg['Image2']):04}"+'.txt')
    X_NEG.append([image1_feature, image2_feature, 0])


  ALL_X = X_POS + X_NEG
  random.shuffle(ALL_X)
  images_pair1 = np.array([item[0] for item in ALL_X])
  images_pair2 = np.array([item[1] for item in ALL_X])
  labels_pair = np.array([item[2] for item in ALL_X])
  return images_pair1, images_pair2, labels_pair
  

images_pair1_TRAIN, images_pair2_TRAIN, labels_pair_TRAIN = generate_image_pairs(images_path, train_path)
images_pair1_TEST, images_pair2_TEST, labels_pair_TEST = generate_image_pairs(images_path, test_path)

In [None]:
def train_model(hidden, images_pair1, images_pair2, labels_pair):
  MAX_LEN = 512
  first_image_in = Input(shape=(MAX_LEN,))
  second_image_in = Input(shape=(MAX_LEN,))
  
  dense = Dense(hidden, activation='relu')
  first_sent_encoded = dense(first_image_in)
  second_sent_encoded = dense(second_image_in)

  l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
  merged = Lambda(function=l1_norm, output_shape=lambda x: x[0])([first_sent_encoded,
                                                                  second_sent_encoded])
  predictions = Dense(1, activation='sigmoid')(merged)

  model = Model([first_image_in, second_image_in], predictions)

  model.compile(loss = 'binary_crossentropy', optimizer = "adam", metrics=["accuracy"])
  print(model.summary())

  model.fit([images_pair1, images_pair2], labels_pair, validation_split=0.1, epochs = 20,shuffle=True, batch_size = 512)

  model.save('./'+'Model'+str(hidden))


In [None]:
train_model(32, images_pair1_TRAIN, images_pair2_TRAIN, labels_pair_TRAIN)
train_model(64, images_pair1_TRAIN, images_pair2_TRAIN, labels_pair_TRAIN)
train_model(128, images_pair1_TRAIN, images_pair2_TRAIN, labels_pair_TRAIN)

In [None]:
'TEST'

# hidden_model128 = keras.models.load_model('/content/Model128')
# hidden_model64 = keras.models.load_model('/content/Model64')
# hidden_model32 = keras.models.load_model('/content/Model32')

'ONE SAMPLE TEST'
# index = 100
# test_image1 = np.expand_dims(images_pair1_TEST[index], axis=0)
# test_image2 = np.expand_dims(images_pair2_TEST[index], axis=0)


# print(hidden_model128.predict([test_image1, test_image2]), labels_pair_TEST[index])
# print(hidden_model64.predict([test_image1, test_image2]), labels_pair_TEST[index])
# print(hidden_model32.predict([test_image1, test_image2]), labels_pair_TEST[index])

'ALL DATA TEST TOGETHER'
def get_predicts(model_path, images_pair1_TEST, images_pair2_TEST):
  model = keras.models.load_model(model_path)
  predict_labels = [round(each_predict[0]) for each_predict in model.predict([images_pair1_TEST, images_pair2_TEST])]
  return predict_labels


predict_labels_model128 = get_predicts('/content/Model128', images_pair1_TEST, images_pair2_TEST)
predict_labels_model64 = get_predicts('/content/Model64', images_pair1_TEST, images_pair2_TEST)
predict_labels_model32 = get_predicts('/content/Model32', images_pair1_TEST, images_pair2_TEST)


In [None]:
def get_predicts_ensemble(model_paths, images_pair1_TEST, images_pair2_TEST):
  if len(model_paths)!=3:
    print('THREE MODELS NEEDED!')
    return None
  model1 = keras.models.load_model(model_paths[0])
  model2 = keras.models.load_model(model_paths[1])
  model3 = keras.models.load_model(model_paths[2])

  predict_labels_1 = [round(each_predict[0]) for each_predict in model1.predict([images_pair1_TEST, images_pair2_TEST])]
  predict_labels_2 = [round(each_predict[0]) for each_predict in model2.predict([images_pair1_TEST, images_pair2_TEST])]
  predict_labels_3 = [round(each_predict[0]) for each_predict in model3.predict([images_pair1_TEST, images_pair2_TEST])]

  ensemble_predicts = []
  for i in range(len(predict_labels_1)):
    ensemble_predicts.append(statistics.mode([predict_labels_1[i], predict_labels_2[i], predict_labels_3[i]]))
  
  return ensemble_predicts


predict_labels_ensemble = get_predicts_ensemble(['/content/Model128', '/content/Model64', '/content/Model32'],
                                                images_pair1_TEST, images_pair2_TEST)

In [None]:

print('ACCURACY OF MODEL128 is "{}", ACCURACY OF MODEL64 is "{}" AND ACCURACY OF MODEL32 is "{}"'.format(
    accuracy_score(labels_pair_TEST, predict_labels_model128),
    accuracy_score(labels_pair_TEST, predict_labels_model64), 
    accuracy_score(labels_pair_TEST, predict_labels_model32)))
print('ACCURACY OF ENSEMBLE is "{}".'.format(accuracy_score(labels_pair_TEST, predict_labels_ensemble)))




ACCURACY OF MODEL128 is "0.739", ACCURACY OF MODEL64 is "0.716" AND ACCURACY OF MODEL32 is "0.657"
ACCURACY OF ENSEMBLE is "0.77".
