## Dependencies

In [3]:
#!pip install tensorflow==2.10 opencv-python matplotlib

In [1]:
import cv2
import os
import random
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Functional API
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import tensorflow as tf



In [12]:
# We don't see any GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
assert len(gpus) == 0

## Folder Structures

In [3]:
# Setup paths
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [14]:
os.makedirs(POS_PATH)
os.makedirs(NEG_PATH)
os.makedirs(ANC_PATH)

## Data Collection

### Negative samples

In [1]:
# Extract lfw face dataset
!tar -xf lfw.tgz

In [11]:
# Moving LFW Images to ./data/negative
for directory in os.listdir('lfw'):
    directory_path = os.path.join('lfw', directory)
    for file in os.listdir(directory_path):
        EX_PATH = os.path.join('lfw', directory, file)
        NEW_PATH = os.path.join(NEG_PATH, file)
        os.replace(EX_PATH, NEW_PATH)

### Positive samples

In [46]:
import uuid

In [50]:
y, x = 140, 250
size = 250 # DON'T MODIFY

In [49]:
cap = cv2.VideoCapture(1)
while cap.isOpened():
    ret, frame = cap.read()
    frame = frame[y:y+size,x:x+size,:]
    
    # Collect anchors
    if cv2.waitKey(1) & 0XFF == ord('a'):
        imgname = os.path.join(ANC_PATH, '{}.jpg'.format(uuid.uuid1()))
        cv2.imwrite(imgname, frame)
    
    # Collect positives
    if cv2.waitKey(1) & 0XFF == ord('p'):
        imgname = os.path.join(POS_PATH, '{}.jpg'.format(uuid.uuid1()))
        cv2.imwrite(imgname, frame)
    
    cv2.imshow('Image Collection', frame)

    if cv2.waitKey(1) & 0XFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

## Data Loading

In [6]:
SAMPLE_SIZE = 300

In [13]:
anchor = tf.data.Dataset.list_files(ANC_PATH+'\*.jpg').take(SAMPLE_SIZE)
positive = tf.data.Dataset.list_files(POS_PATH+'\*.jpg').take(SAMPLE_SIZE)
negative = tf.data.Dataset.list_files(NEG_PATH+'\*.jpg').take(SAMPLE_SIZE)

## Data preprocessing

In [14]:
def preprocess(file_path):
    byte_img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(byte_img)
    img = tf.image.resize(img, (100, 100))
    img = img / 255.0
    return img

In [15]:
positives = tf.data.Dataset.zip((anchor, positive, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor)))))
negative = tf.data.Dataset.zip((anchor, negative, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor)))))
data = positives.concatenate(negative)

In [621]:
def preprocess_twin(input_img, validation_img, label):
    return (preprocess(input_img), preprocess(validation_img), label)

In [623]:
# DataLoader pipeline
data = data.map(preprocess_twin)
data = data.cache()
data = data.shuffle(buffer_size=1024)

In [625]:
train_data = data.take(round(len(data)*.7))
train_data = train_data.batch(16)
train_data = train_data.prefetch(8)

In [627]:
test_data = data.skip(round(len(data)*0.7))
test_data = test_data.take(round(len(data)*.3))