In [26]:
import os
import cv2
import time
import random
import numpy as np

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras import layers, metrics
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model, Sequential

from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

In [27]:
# Planting
random.seed(5)
np.random.seed(5)
tf.random.set_seed(5)

# # Global variables
LFW = "LFW"
LFW_DATASET_CHANGED = os.path.join(LFW, "lfw_changed")
DATA_PATH = LFW_DATASET_CHANGED
DATASETS = "Datasets"
TRAIN_DATASETS_PATH = os.path.join(DATASETS, "trainDataset")
TEST_DATASETS_PATH = os.path.join(DATASETS, "testDataset")
TRAIN_TRIPLETS_PATH = os.path.join(DATASETS, "trainTriplets")
TEST_TRIPLETS_PATH = os.path.join(DATASETS, "testTriplets")

CHECKPOINT_PATH = 'Checkpoints'
ENCODER_SAVE_PATH = 'Encoder'

EMBEDDING_SAVE_PATH = os.path.join(CHECKPOINT_PATH, "embedding")
EMBEDDING_BEST_SAVE_PATH = os.path.join(CHECKPOINT_PATH, "embedding_best")
SIAMESE_MODEL_SAVE_PATH = os.path.join(CHECKPOINT_PATH, "siamese_model")
SIAMESE_MODEL_BEST_SAVE_PATH = os.path.join(CHECKPOINT_PATH, "siamese_model_best")
TEST_METRICS_PATH = os.path.join(CHECKPOINT_PATH, "testMetrics")
TRAIN_LOSS_PATH = os.path.join(CHECKPOINT_PATH, "trainLoss")

IMAGE_SIZE = 250

In [28]:
def getTriplets(directory, foldersList, max=10):
  triplets = []
  folders = list(foldersList.keys())
  
  for folder in folders:
    path = os.path.join(directory, folder)
    files = list(os.listdir(path))[:max]
    filesAmount = len(files)
      
    for i in range(filesAmount-1):
      for j in range(i+1, filesAmount):
        anchor = (folder, f"{i}.jpg")
        positive = (folder, f"{j}.jpg")

        # As long as negative folder is the same as anchor's folder other choose random folder
        negativeFolder = folder
        while negativeFolder == folder:
          negativeFolder = random.choice(folders)
        negativeFile = random.randint(0, foldersList[negativeFolder]-1)
        negative = (negativeFolder, f"{negativeFile}.jpg")

        triplets.append((anchor, positive, negative))
          
  random.shuffle(triplets)
  return triplets

In [43]:
def readImageForGetTripletsBatchMethod(path):
  path = os.path.join(DATA_PATH, path[0], path[1])
  print(path)
  image = cv2.imread(path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  return image

# Keep in mind size
def getTripletsBatch(tripletsList, size=256):
  triplets = (len(tripletsList) // size) + 1

  for i in range(triplets):
    anchor   = []
    positive = []
    negative = []
    
    j = i * size
    while ( j < (i + 1) * size ) and ( j < len(tripletsList) ):
      anch, pos, neg = tripletsList[j]
      anchor.append(readImageForGetTripletsBatchMethod(anch))
      positive.append(readImageForGetTripletsBatchMethod(pos))
      negative.append(readImageForGetTripletsBatchMethod(neg))
      j += 1

    anchor = preprocess_input(np.array(anchor))
    positive = preprocess_input(np.array(positive))
    negative = preprocess_input(np.array(negative))

    yield ([anchor, positive, negative])

In [30]:
def getEmbedding(inputShape):
  XceptionModel = Xception(
    input_shape=inputShape,
    weights='imagenet',
    include_top=False,
    pooling='avg'
  )
  
  for i in range(len(XceptionModel.layers) - 27):
    XceptionModel.layers[i].trainable = False

  model = Sequential([
    XceptionModel,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(256, activation="relu"),
    layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))
  ], name="Embedding")

  return model

In [31]:
ambe = getEmbedding((IMAGE_SIZE, IMAGE_SIZE, 3))
ambe.summary()

In [32]:
def extractEmbedding(model):
  embedding = getEmbedding((IMAGE_SIZE, IMAGE_SIZE, 3))
  i=0
  for embbeddingLayer in model.layers[0].layers[3].layers:
    layer_weight = embbeddingLayer.get_weights()
    embedding.layers[i].set_weights(layer_weight)
    i += 1

  return embedding

In [35]:
def ModelMetrics(positives, negatives):
  true = np.array([0] * len(positives) + [1] * len(negatives))
  predicted = np.append(positives, negatives)
  
  print(f"\nAccuracy of model: {accuracy_score(true, predicted)}\n")  
  confusionMatrix = confusion_matrix(true, predicted)
  categories  = ['Similar', 'Different']
  names = ['Exected Similar', 'Unexpected Similar', 'Unexpected Different', 'Exected Different']
  percentages = ['{0:.2%}'.format(value) for value in confusionMatrix.flatten() / np.sum(confusionMatrix)]

  labels = [(str(v1) + "\n" + str(v2)) for v1, v2 in zip(names, percentages)]
  labels = np.asarray(labels).reshape(2, 2)

  sns.heatmap(confusionMatrix, annot=labels, cmap='Blues', fmt='', xticklabels=categories, yticklabels=categories)
  plt.xlabel("Predicted", fontdict={'size':14}, labelpad=10)
  plt.ylabel("Actual", fontdict={'size':14}, labelpad=10)
  plt.title ("Confusion Matrix", fontdict={'size':18}, pad=20)

In [36]:
def classifyImages(anchorList, evaluationList, embedding, threshold=1.3):
  anchorTensor = embedding.predict(anchorList)
  evaluationTensor = embedding.predict(evaluationList)
  
  distance = np.sum(np.square(anchorTensor - evaluationTensor), axis=-1)
  prediction = np.where(distance <= threshold, 0, 1)

  return prediction

In [38]:
def trainSiameseModel(trainDataset, testTriplets, epochs=10, batchSize=256):
  max_acc=0
  trainLoss = []
  testMetrics = []

  for epoch in range(1, epochs+1):
    t = time.time()
    epochLoss = []
    for data in getTripletsBatch(trainTriplets, size=batchSize):
      loss = siameseModel.train_on_batch(data)
      epochLoss.append(loss)
    epochLoss = sum(epochLoss)/len(epochLoss)
    trainLoss.append(epochLoss)

    print(f"\nEPOCH: {epoch} \t (Done in: {int(time.time()-t)} sec)")
    print(f"Train Loss = {epochLoss:.5f}")
    
    # Test on test data
    metric = testOnTriplets(testTriplets, size=batchSize)
    testMetrics.append(metric)
    accuracy = metric[0]
    
    # Save new best weights and embedding model
    if accuracy>=max_acc:
      siameseModel.save_weights(SIAMESE_MODEL_BEST_SAVE_PATH)
      max_acc = accuracy
      embedding = extractEmbedding(siameseModel)
      embedding.save_weights(EMBEDDING_BEST_SAVE_PATH)

  # Save weoghts and embedding model
  siameseModel.save_weights(SIAMESE_MODEL_SAVE_PATH)
  embedding = extractEmbedding(siameseModel)
  embedding.save_weights(EMBEDDING_SAVE_PATH)

  loadedTestMetrics = []
  loadedTrainLoss = []
  #  load train loss and test metrics to add and save new ones (if possible)
  try:
    with open(TEST_METRICS_PATH, 'rb') as input:
      loadedTestMetrics = pickle.load(input)
    with open(TRAIN_LOSS_PATH, 'rb') as input:
      loadedTrainLoss = pickle.load(input)
  except:
    print("Could not load test metrics or train loss for file. It probabely doesn't exist")
  
  # Add new data to existing one 
  loadedTestMetrics += (testMetrics)
  loadedTrainLoss += (trainLoss)
  
  # Save old (if exists) + new data or just new data
  with open(TEST_METRICS_PATH, 'wb') as output:
    pickle.dump(loadedTestMetrics, output)
  with open(TRAIN_LOSS_PATH, 'wb') as output:
    pickle.dump(loadedTrainLoss, output)

  # Notice it is only new data
  testMetrics = np.array(testMetrics)
  plotMetrics(trainLoss, testMetrics)

In [39]:
def plotMetrics(loss, metrics):
  accuracy = metrics[:, 0]
  anchorPositiveMean  = metrics[:, 1]
  anchorNegativeMean  = metrics[:, 2]
  anchorPositiveStds  = metrics[:, 3]
  anchorNegativeStds  = metrics[:, 4]
  
  plt.figure(figsize=(15,5))
  
  # Plott loss over epochs
  plt.subplot(121)
  plt.plot(loss, 'b', label='Loss')
  plt.title('Training loss')
  plt.legend()
  
  # Plott accuracy over epochs
  plt.subplot(122)
  plt.plot(accuracy, 'r', label='Accuracy')
  plt.title('Testing Accuracy')
  plt.legend()
  
  plt.figure(figsize=(15,5))
  
  # Compare means over epochs
  plt.subplot(121)
  plt.plot(anchorPositiveMean, 'b', label='AP Mean')
  plt.plot(anchorNegativeMean, 'g', label='AN Mean')
  plt.title('Means Comparision')
  plt.legend()
  
  # Plott accuracy
  ap_75quartile = (anchorPositiveMean + anchorPositiveStds)
  an_75quartile = (anchorNegativeMean - anchorNegativeStds)
  plt.subplot(122)
  plt.plot(ap_75quartile, 'b', label='AP (Mean+SD)')
  plt.plot(an_75quartile, 'g', label='AN (Mean-SD)')
  plt.title('75th Quartile Comparision')
  plt.legend()

In [40]:
# Load train and test triplets
with open(TRAIN_TRIPLETS_PATH, 'rb') as input:
  trainTriplets = pickle.load(input)
with open(TEST_TRIPLETS_PATH, 'rb') as input:
  testTriplets = pickle.load(input)

In [44]:
trainSiameseModel(trainDataset, testTriplets, epochs=2)

LFW\lfw_changed\Michael_Jordan\2.jpg
LFW\lfw_changed\Michael_Jordan\1.jpg
LFW\lfw_changed\Steven_Spielberg\3.jpg
LFW\lfw_changed\Paula_Radcliffe\2.jpg
LFW\lfw_changed\Paula_Radcliffe\3.jpg
LFW\lfw_changed\Kelly_Clarkson\1.jpg
LFW\lfw_changed\Richard_Gere\6.jpg
LFW\lfw_changed\Richard_Gere\0.jpg
LFW\lfw_changed\Samuel_Waksal\1.jpg
LFW\lfw_changed\Lucio_Gutierrez\0.jpg
LFW\lfw_changed\Lucio_Gutierrez\5.jpg
LFW\lfw_changed\Pedro_Almodovar\1.jpg
LFW\lfw_changed\Frank_Solich\3.jpg
LFW\lfw_changed\Frank_Solich\0.jpg
LFW\lfw_changed\Robert_Evans\2.jpg
LFW\lfw_changed\Robert_Kocharian\2.jpg
LFW\lfw_changed\Robert_Kocharian\3.jpg
LFW\lfw_changed\Enrique_Bolanos\3.jpg
LFW\lfw_changed\Igor_Ivanov\6.jpg
LFW\lfw_changed\Igor_Ivanov\7.jpg
LFW\lfw_changed\Ken_Macha\0.jpg
LFW\lfw_changed\John_Abizaid\4.jpg
LFW\lfw_changed\John_Abizaid\0.jpg
LFW\lfw_changed\Xanana_Gusmao\1.jpg
LFW\lfw_changed\Gerhard_Schroeder\1.jpg
LFW\lfw_changed\Gerhard_Schroeder\8.jpg
LFW\lfw_changed\Ana_Guevara\6.jpg
LFW\lfw_chang

ValueError: In a nested call() argument, you cannot mix tensors and non-tensors. Received invalid mixed argument: inputs=([<tf.Tensor 'data:0' shape=(256, 250, 250, 3) dtype=float32>, <tf.Tensor 'data_1:0' shape=(256, 250, 250, 3) dtype=float32>, <tf.Tensor 'data_2:0' shape=(256, 250, 250, 3) dtype=float32>], None, None)