<a href="https://colab.research.google.com/github/dude123studios/AdvancedDeepLearning/blob/main/Face_Recognition_with_Siamese_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import *
import matplotlib.pyplot as plt
import pandas as pd
import random

Data is from https://www.kaggle.com/vasukipatel/face-recognition-dataset, and includes different celeberty images of about 40~ different classes. We will train our network to check if two different images are of the same class. In this way, we will be able to generalize to any use case of people. We will use the kaggle api to download the dataset. In order to try this yourself, you would need a file called kaggle.json on your computer, which you can get from the kaggle homepage

In [2]:
from google.colab import files
files.upload() # Browse for the kaggle.json file that you downloaded

# Make directory named kaggle, copy kaggle.json file there, and change the permissions of the file.
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

# You can check if everything's okay by running this command.
! kaggle datasets list

# Download and unzip sign-language-mnist dataset into '/usr/local'
! kaggle datasets download -d vasukipatel/face-recognition-dataset --path '/usr/local' --unzip

Saving kaggle.json to kaggle.json
ref                                                       title                                         size  lastUpdated          downloadCount  
--------------------------------------------------------  -------------------------------------------  -----  -------------------  -------------  
gpreda/reddit-wallstreetsbets-posts                       Reddit WallStreetBets Posts                    7MB  2021-02-18 07:47:29           1282  
michau96/restaurant-business-rankings-2020                Restaurant Business Rankings 2020             16KB  2021-01-30 14:20:45           2092  
yorkehead/stock-market-subreddits                         Stock Market Subreddits                        1MB  2021-01-29 13:53:50            225  
aagghh/crypto-telegram-groups                             Crypto telegram groups                       297MB  2021-02-02 09:58:25            149  
timoboz/superbowl-history-1967-2020                       Superbowl History 1967 - 2

In [3]:
def preprocces_img(image_path):
  img = tf.io.read_file('/usr/local/Faces/Faces/'+image_path)
  img = tf.image.decode_jpeg(img, channels=3)
  img = tf.image.resize(img, (64,64))
  #use mobilenet preproccesing for convinience, even if we use a smaller model
  img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
  return img

In [4]:
df = pd.read_csv('/usr/local/Dataset.csv')
face_urls = df['id'].tolist()
labels = df['label'].tolist()
classes = set(label for label in labels)
classes = list(classes)
faces = []
pairs1 = []
pairs2 = []
for class_ in classes:
  faces.append([preprocces_img(face_url) for i, face_url in enumerate(face_urls) if labels[i]==class_])

#produces about 100,000 image pairs, so we only take 40,000 for training time
total = 40000
labels = []
congruent_label = np.asarray([1])
for class_ in faces:
  for i in range(total // len(classes)):
    j = random.randint(0,len(class_)-2)
    k = random.randint(j+1, len(class_)-1)
    pairs1.append(class_[j])
    pairs2.append(class_[k])
    labels.append(congruent_label)
#all non congruent pairs form 3m pairs which is far too much, 
#so we will randomly choose 30,000
#giving us a more reasonable 60,000 total images, since a lot of epochs will be required
non_congruent_label = np.asarray([0])
for i in range(total):
  j = random.randint(0,len(faces)-2)
  k = random.randint(j+1,len(faces)-1)
  l = random.randint(0,len(faces[j])-1)
  m = random.randint(0,len(faces[k])-1)
  pairs1.append(faces[j][l])
  pairs2.append(faces[k][m])
  labels.append(non_congruent_label)
del faces
del face_urls
del classes

In [5]:
pairs1 = np.asarray(pairs1)
pairs2 = np.asarray(pairs2)
labels = np.asarray(labels)

In [6]:
'''
train_size = 50000
test_size = 5000
val_size = 5000
test_dataset = dataset.take(test_size).batch(128, drop_remainder=True)
val_dataset = dataset.skip(test_size).take(val_size).batch(128, drop_remainder=True)
train_dataset = dataset.skip(test_size+val_size).batch(128, drop_remainder=True)
del dataset
'''
#we will only train on this notebook

'\ntrain_size = 50000\ntest_size = 5000\nval_size = 5000\ntest_dataset = dataset.take(test_size).batch(128, drop_remainder=True)\nval_dataset = dataset.skip(test_size).take(val_size).batch(128, drop_remainder=True)\ntrain_dataset = dataset.skip(test_size+val_size).batch(128, drop_remainder=True)\ndel dataset\n'

Now the fun part! lets define the model. We have one model which learns about facial features with conv layers and another that learns facial diferentiation  with just a couple of dense layers to a singular nueron which will be 1 for true and 0 for false wether or not two faces are of the same person

In [7]:
class CNN_Block(tf.keras.Model):
  def __init__(self, num_filters, conv_size):
    super(CNN_Block, self).__init__()
    self.conv = Conv2D(num_filters, conv_size, padding='same')
    self.bn = BatchNormalization()
    self.dropout = Dropout(0.3)
    self.pool = MaxPool2D()

  def call(self, x):
    x = self.conv(x)
    x = self.bn(x)
    x = tf.nn.relu(x)
    x = self.dropout(x)
    x = self.pool(x)
    return x

In [8]:
class Encoder(tf.keras.Model):
  def __init__(self):
    super(Encoder, self).__init__()
    self.block1 = CNN_Block(32, 3)
    self.block2 = CNN_Block(64, 3)
    self.block3 = CNN_Block(128, 3)
    self.block4 = CNN_Block(256, 3)
    self.flatten = Flatten()

  def call(self, x):
    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)
    x = self.block4(x)
    x = self.flatten(x)
    return x

In [9]:
class Euclidean_Distance(tf.keras.Model):
  def __init__(self):
    super(Euclidean_Distance, self).__init__()
    self.dense = Dense(1, activation='sigmoid')

  def call(self, xA, xB):
    sumSquared = tf.keras.backend.sum(tf.keras.backend.square(xA - xB), axis=-1, keepdims=True)
    distance = tf.keras.backend.sqrt(tf.keras.backend.maximum(sumSquared, tf.keras.backend.epsilon()))
    a = self.dense(distance)
    return a

In [10]:
def build_model():
  imgA = Input((64,64,3), dtype=tf.float32, name='inputA')
  imgB = Input((64,64,3), dtype=tf.float32, name='inputB')
  encoder = Encoder()
  featA = encoder(imgA)
  featB = encoder(imgB)
  dist = Euclidean_Distance()
  outputs = dist(featA, featB)
  model = tf.keras.Model(inputs=(imgA, imgB),outputs=outputs)
  model.compile(loss='binary_crossentropy',optimizer='adam')
  model.summary()
  return model

In [11]:
with tf.device('gpu:0'):
  inputs = {'inputA':pairs1, 'inputB':pairs2}
  model = build_model()
  model.fit(inputs, labels,shuffle=True, epochs=50)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputA (InputLayer)             [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
inputB (InputLayer)             [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
encoder (Encoder)               (None, 4096)         390336      inputA[0][0]                     
                                                                 inputB[0][0]                     
__________________________________________________________________________________________________
euclidean__distance (Euclidean_ (None, 1)            2           encoder[0][0]                

In [12]:
model.save_weights('face_recogniton.h5')