# suppose dataloader contain only images, so we can encode all of them using our model and when a new image comes in we can encode it to compare with all the images with distance function(similar to loss function where we take square of norm two of both encoded images and then subtrackt eachother) if it is less than the threshold we can claim that those two images belong to the same person

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [4]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.loss_fn = nn.TripletMarginLoss(margin=margin)

    def forward(self, anchor, positive, negative):
        return self.loss_fn(anchor, positive, negative)

In [5]:
model_resnet18 = models.resnet18(weights= models.ResNet18_Weights.DEFAULT)

In [6]:
print(model_resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
class FaceRecognitionModel(nn.Module):
    def __init__(self, model, embedding_dim=128):
        super(FaceRecognitionModel, self).__init__()
        self.backbone = model
        # Replace the last fully connected layer with an embedding layer
        #in_features = self.backbone.fc.in_features
        #self.backbone.fc = nn.Linear(in_features, embedding_dim)

    def forward(self, x):
        return self.backbone(x)

In [8]:
#model = FaceRecognitionModel(
#    model_resnet18, embedding_dim=128).eval()
#loss_fn = TripletLoss(margin=1.0)
#optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [36]:
def verify(person_1, person_2):
    model = FaceRecognitionModel(
        model_resnet18, embedding_dim=128).eval()

    person_1_feature = model(person_1)
    person_2_feature = model(person_2)

    if torch.norm(person_1_feature - person_2_feature, dim=1) < 15:
        print('same person')
    else:
        print('they are different')

In [37]:
transform = T.Compose([
    T.Resize((256, 256)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

image_1 = transform(Image.open('./images/camera_2.jpg')).unsqueeze(0)
image_2 = transform(Image.open('./images/benoit.jpg')).unsqueeze(0)
image_3 = transform(Image.open('./images/camera_4.jpg')).unsqueeze(0)
image_4 = transform(Image.open('./images/dan.jpg')).unsqueeze(0)

In [38]:
verify(image_1, image_2)

same person


In [39]:
verify(image_3, image_4)

same person


In [40]:
verify(image_1, image_4)

they are different
