# 1. Setup

## 1.1 Install Dependencies

In [None]:
%pip install tensorflow opencv-python matplotlib

## 1.2 Import Dependencies

In [4]:
# Import standard libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import random

In [7]:
# Import TensorFlow dependencies - functional API
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Layer
import tensorflow as tf

## 1.3 Create Folder Strutures

In [9]:
# Setup paths
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [11]:
# Make the directories
os.makedirs(POS_PATH, exist_ok=True)
os.makedirs(NEG_PATH, exist_ok=True)
os.makedirs(ANC_PATH, exist_ok=True)

# 2. Collect Postives and Anchors

## 2.1 Retrieve Labelled Faces in the Wild Dataset

Installing another package - retrieves the required dataset from kaggle

In [14]:
%pip install kagglehub

Collecting kagglehub
  Using cached kagglehub-0.3.12-py3-none-any.whl.metadata (38 kB)
Collecting pyyaml (from kagglehub)
  Using cached PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting tqdm (from kagglehub)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached kagglehub-0.3.12-py3-none-any.whl (67 kB)
Using cached PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl (172 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, pyyaml, kagglehub
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [kagglehub]
[1A[2KSuccessfully installed kagglehub-0.3.12 pyyaml-6.0.2 tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


Using Kaggles API to download the dataset locally to your computer.

You may need to create a Kaggle Account and create an API key.

In [15]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jessicali9530/lfw-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/jessicali9530/lfw-dataset?dataset_version_number=4...


100%|██████████| 112M/112M [00:04<00:00, 24.7MB/s] 

Extracting files...





Path to dataset files: /Users/ethanvillalovoz/.cache/kagglehub/datasets/jessicali9530/lfw-dataset/versions/4


The downloaded dataset was stored in my .cache directory of my machine.

Moves it to the current working directory you are coding this project in.

In [16]:
import shutil
import os

# Source path (where KaggleHub downloaded the dataset)
src_path = "/Users/ethanvillalovoz/.cache/kagglehub/datasets/jessicali9530/lfw-dataset/versions/4"

# Destination path (your project directory, e.g., 'data/lfw-dataset')
dst_path = os.path.join(os.getcwd(), "data", "lfw-dataset")

# Move the directory
if not os.path.exists(dst_path):
    shutil.move(src_path, dst_path)
    print(f"Dataset moved to: {dst_path}")
else:
    print("Destination already exists. Remove it first if you want to overwrite.")

Dataset moved to: /Users/ethanvillalovoz/Desktop/deepface-oneshot-paper-reimplementation/data/lfw-dataset


Moves all of the images from the dataset into our negative image folder

In [None]:
for directory in os.listdir(dst_path):
    if directory.startswith("lfw-deepfunneled"):
        src_dir = os.path.join(dst_path, directory)
        for sub_directory in os.listdir(src_dir):
            if directory.startswith("lfw-deepfunneled"):
                sub_dir_path = os.path.join(src_dir, sub_directory)
                for filename in os.listdir(sub_dir_path):
                    filename_path = os.path.join(sub_dir_path, filename)
                    for image in os.listdir(filename_path):
                        if image.endswith(".jpg"):
                            image_path = os.path.join(filename_path, image)
                            print(f"Processing image: {image_path}")
                            final_dst_path = os.path.join(NEG_PATH, image)
                            print(f"dst_path: {final_dst_path}")
                            shutil.move(image_path, final_dst_path)
                            print(f"Moved {image_path} to {final_dst_path}")

## 2.2 Collect Positive and Anchor Classes

In [39]:
# Import uuid library for unique file names
import uuid

In [40]:
# Establish connection to the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    frame = frame[120:120+250, 120:120+250, :]  # Crop the frame to a square

    # Collect anchor images
    if cv2.waitKey(1) & 0xFF == ord('a'):
        # Create a unique filename using uuid
        imgname = os.path.join(ANC_PATH, f"{uuid.uuid1()}.jpg")
        # Wrute the image to the anchor path
        cv2.imwrite(imgname, frame)

    # Collect positive images
    if cv2.waitKey(1) & 0xFF == ord('p'):
        # Create a unique filename using uuid
        imgname = os.path.join(POS_PATH, f"{uuid.uuid1()}.jpg")
        # Wrute the image to the anchor path
        cv2.imwrite(imgname, frame)

    # Display the frame
    cv2.imshow('Image Collection', frame)

    # Breaking gracefully
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

OpenCV: AVFoundation didn't find any attached Video Input Devices!
OpenCV: camera failed to properly initialize!


# 3. Load and Preprocess Images

## 3.1 Get Image Directories

In [None]:
anchor = tf.data.Dataset.list_files(os.path.join(ANC_PATH, '*.jpg')).take(300)
positive = tf.data.Dataset.list_files(os.path.join(POS_PATH, '*.jpg')).take(300)
negative = tf.data.Dataset.list_files(os.path.join(NEG_PATH, '*.jpg')).take(300)