**TESTING FaceNet METHODS**

In [1]:
import os
from PIL import Image
import numpy as np
from torchvision import transforms, datasets
import torch
from model import FaceNet
import random
from torch.utils.data import TensorDataset, Dataset

<h1>Reading from the Dataset</h1>
<p>
    Dataset from: https://www.kaggle.com/datasets/dansbecker/5-celebrity-faces-dataset
    This is a small dataset for experimenting with computer vision techniques. It has a training directory containing 14-20 photos each of the celebrities

    Ben Afflek
    Elton John
    Jerry Seinfeld
    Madonna
    Mindy Kaling

    The validation directory has 5 photos of each celebrity.


    In the below code, images of ben affleck are read.
</p>

In [2]:
directory = "C:\\Users\\adity\\Downloads\\faces_for_embedding\\train\\ben_afflek"
image_height, image_width = 128, 128
transform = transforms.Compose([
    transforms.Resize((image_height, image_width)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
images = []
for filename in os.listdir(directory):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image_path = os.path.join(directory, filename)
        image = Image.open(image_path).convert('RGB')
        image = transform(image)
        images.append((image))

<p>
    Printing the number of images, the type of each image, and the shape of the tensor
</p>

In [3]:
images = torch.stack(images)
print(len(images))
print(type(images[0]))
print(images.shape)

14
<class 'torch.Tensor'>
torch.Size([14, 3, 128, 128])


<p>
As expected, there are 14 RGB 128x128 images.
</p>

In [4]:
facenet = FaceNet(embedding_size=256)
print(facenet.model.last_linear)
embeddings = facenet.embed(images)
print(embeddings.shape)
print(type(embeddings))

#saving and loading model states
facenet.save_model('test_save')
FaceNet.load_model('test_save')

Linear(in_features=1792, out_features=256, bias=True)
(14, 256)
<class 'numpy.ndarray'>


<model.FaceNet at 0x1bf91cb9bd0>

<p>As expected, the model returns 14 256 dimensional tensors, upon setting the embedding_size to 256.</p>

<H1>Extracting images from folder</H1>

In [10]:
root_folder = "C:\\Users\\adity\\Downloads\\faces_for_embedding\\train"

# Create the ImageFolder dataset
dataset = datasets.ImageFolder(root_folder, transform=transform)

# Get the class names (person names)
class_names = dataset.classes

# Define the number of samples for each triplet
num_triplets = 250

# Initialize empty lists to store the triplet samples
anchors = []
positives = []
negatives = []

# Iterate over the class folders
for class_idx in range(len(class_names)):
    class_folder = os.path.join(root_folder, class_names[class_idx])

    # Get the list of images for the current class
    image_list = dataset.imgs[dataset.class_to_idx[class_names[class_idx]]:]

    # Select random samples for the triplets
    for _ in range(num_triplets):
        # Randomly select a person (anchor and positive samples will be from this person)
        person_idx = class_idx

        # Randomly select an anchor image from the person's subfolder
        anchor_image_path = random.choice(image_list)[0]
        anchor_image = dataset.loader(anchor_image_path)

        # Randomly select a positive image from the same person's subfolder
        positive_image_path = random.choice(image_list)[0]
        positive_image = dataset.loader(positive_image_path)

        # Randomly select a negative person (different from the anchor and positive person)
        negative_person_idx = (person_idx + random.randint(1, len(class_names) - 1)) % len(class_names)

        # Get the list of images for the negative person
        negative_person_folder = os.path.join(root_folder, class_names[negative_person_idx])
        negative_person_image_list = dataset.imgs[dataset.class_to_idx[class_names[negative_person_idx]]:]

        # Randomly select a negative image from the negative person's subfolder
        negative_image_path = random.choice(negative_person_image_list)[0]
        negative_image = dataset.loader(negative_image_path)

        # Append the triplet samples to the respective lists
        anchors.append(anchor_image)
        positives.append(positive_image)
        negatives.append(negative_image)

In [6]:
'''for i in range(len(anchors)):
    anchors[i] = transform(anchors[i])
    positives[i] = transform(positives[i])
    negatives[i] = transform(negatives[i])
'''

In [12]:
print(type(anchors[0]))


<class 'PIL.Image.Image'>


<H1>Defining dataset class, creating triplet (anchor, positive, negative) dataset</H1>

In [13]:
class TripletDataset(Dataset):
    def __init__(self, anchors, positives, negatives, transform=None):
        self.anchors = anchors
        self.positives = positives
        self.negatives = negatives
        self.transform = transform

    def __len__(self):
        return len(self.anchors)

    def __getitem__(self, index):
        anchor = self.anchors[index]
        positive = self.positives[index]
        negative = self.negatives[index]

        if self.transform is not None:
            anchor = self.transform(anchor)
            positive = self.transform(positive)
            negative = self.transform(negative)

        return anchor, positive, negative

train_dataset = TripletDataset(anchors, positives, negatives, transform=transform)

<H1>Training the model</H1>

In [14]:
facenet.train(train_dataset, 64, 10, 0.002)

Epoch [1/10], Loss: 1.000537946820259
Epoch [2/10], Loss: 1.0204570174217225
Epoch [3/10], Loss: 0.9928133010864257
Epoch [4/10], Loss: 0.9548749297857284
Epoch [5/10], Loss: 0.9712955892086029
Epoch [6/10], Loss: 0.9703152447938919
Epoch [7/10], Loss: 0.9606680750846863
Epoch [8/10], Loss: 0.9523997902870178
Epoch [9/10], Loss: 0.9306520104408265
Epoch [10/10], Loss: 0.9520917236804962
