In [75]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy.stats as stats
from PIL import Image
from torchvision import transforms
import torch
from torch.utils.data import Dataset
from torchvision import models
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import os
from PIL import Image
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from PIL import Image, UnidentifiedImageError 


In [66]:
df_bbox = pd.read_csv('Periocular  - bbox.csv', delimiter=',')
df_bbox.info()
df_landmarks = pd.read_csv('Periocular  - landmarks.csv', delimiter=',')
df_landmarks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   image_id  1000 non-null   object
 1   x_1       1000 non-null   int64 
 2   y_1       1000 non-null   int64 
 3   width     1000 non-null   int64 
 4   height    1000 non-null   int64 
dtypes: int64(4), object(1)
memory usage: 39.2+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   image_id      1000 non-null   object
 1   lefteye_x     1000 non-null   int64 
 2   lefteye_y     1000 non-null   int64 
 3   righteye_x    1000 non-null   int64 
 4   righteye_y    1000 non-null   int64 
 5   nose_x        1000 non-null   int64 
 6   nose_y        1000 non-null   int64 
 7   leftmouth_x   1000 non-null   int64 
 8   leftmouth_y   1000 non-null   int64 
 9   right

In [67]:
df_landmarks.head(10)

Unnamed: 0,image_id,lefteye_x,lefteye_y,righteye_x,righteye_y,nose_x,nose_y,leftmouth_x,leftmouth_y,rightmouth_x,rightmouth_y
0,000001.jpg,69,109,106,113,77,142,73,152,108,154
1,000002.jpg,69,110,107,112,81,135,70,151,108,153
2,000003.jpg,76,112,104,106,108,128,74,156,98,158
3,000004.jpg,72,113,108,108,101,138,71,155,101,151
4,000005.jpg,66,114,112,112,86,119,71,147,104,150
5,000006.jpg,71,111,106,110,94,131,74,154,102,153
6,000007.jpg,70,112,108,111,85,135,72,152,104,152
7,000008.jpg,71,110,106,111,84,137,73,155,104,153
8,000009.jpg,68,113,110,111,97,139,66,152,109,150
9,000010.jpg,68,111,108,112,89,136,70,151,107,151


In [68]:
df_bbox.head(10)

Unnamed: 0,image_id,x_1,y_1,width,height
0,000001.jpg,95,71,226,313
1,000002.jpg,72,94,221,306
2,000003.jpg,216,59,91,126
3,000004.jpg,622,257,564,781
4,000005.jpg,236,109,120,166
5,000006.jpg,146,67,182,252
6,000007.jpg,64,93,211,292
7,000008.jpg,212,89,218,302
8,000009.jpg,600,274,343,475
9,000010.jpg,113,110,211,292


In [69]:
df_bbox['base_id'] = df_bbox['image_id'].str.replace('-with-mask', '', regex=False)
df_landmarks['base_id'] = df_landmarks['image_id']

In [70]:
# Merge datasets on base_id
paired_data = pd.merge(df_bbox, df_landmarks, on='base_id', suffixes=('_masked', '_unmasked'))
paired_data.head()
paired_data['image_id_masked'] = paired_data['image_id_masked'].apply(lambda x: f"img{x.split('.')[0]}-with-mask.jpg")
paired_data['image_id_unmasked'] = paired_data['image_id_unmasked'].apply(lambda x: f"img{x.split('.')[0]}.jpg")

paired_data.head()


Unnamed: 0,image_id_masked,x_1,y_1,width,height,base_id,image_id_unmasked,lefteye_x,lefteye_y,righteye_x,righteye_y,nose_x,nose_y,leftmouth_x,leftmouth_y,rightmouth_x,rightmouth_y
0,img000001-with-mask.jpg,95,71,226,313,000001.jpg,img000001.jpg,69,109,106,113,77,142,73,152,108,154
1,img000002-with-mask.jpg,72,94,221,306,000002.jpg,img000002.jpg,69,110,107,112,81,135,70,151,108,153
2,img000003-with-mask.jpg,216,59,91,126,000003.jpg,img000003.jpg,76,112,104,106,108,128,74,156,98,158
3,img000004-with-mask.jpg,622,257,564,781,000004.jpg,img000004.jpg,72,113,108,108,101,138,71,155,101,151
4,img000005-with-mask.jpg,236,109,120,166,000005.jpg,img000005.jpg,66,114,112,112,86,119,71,147,104,150


In [71]:


class MaskedFaceDataset(Dataset):
    def __init__(self, paired_data, images_path, transform=None):
        self.paired_data = paired_data
        self.images_path = images_path
        self.transform = transform

    def __len__(self):
        return len(self.paired_data)

    def __getitem__(self, idx):
        row = self.paired_data.iloc[idx]
        
        try:
            # Load masked image
            masked_path = os.path.join(self.images_path, row['image_id_masked'])
            masked_img = Image.open(masked_path).convert("RGB")
            
            # Crop masked image using bounding box data
            x1, y1, width, height = row['x_1'], row['y_1'], row['width'], row['height']
            masked_img = masked_img.crop((x1, y1, x1 + width, y1 + height))

            # Load unmasked image
            unmasked_path = os.path.join(self.images_path, row['image_id_unmasked'])
            unmasked_img = Image.open(unmasked_path).convert("RGB")

            # Apply transformations
            if self.transform:
                masked_img = self.transform(masked_img)
                unmasked_img = self.transform(unmasked_img)

            return masked_img, unmasked_img, row['base_id']

        except (FileNotFoundError, UnidentifiedImageError) as e:
            print(f"Skipping missing or invalid image: {e}")
            # If an image is missing, skip this sample
            return self.__getitem__((idx + 1) % len(self))



In [72]:
class FaceEmbeddingModel(nn.Module):
    def __init__(self, embedding_dim=128):
        super(FaceEmbeddingModel, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()  
        
        self.embedding = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Linear(512, embedding_dim)
        )
    
    def forward(self, x):
        features = self.backbone(x)      
        embeddings = self.embedding(features)  
        embeddings = nn.functional.normalize(embeddings, p=2, dim=1) 
        return embeddings

In [73]:
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, embedding1, embedding2, label):
        distance = torch.nn.functional.pairwise_distance(embedding1, embedding2)
        
        loss = (1 - label) * torch.pow(distance, 2) + \
               label * torch.pow(torch.clamp(self.margin - distance, min=0), 2)
        
        return torch.mean(loss)


In [77]:
batch_size = 8
epochs = 10
learning_rate = 0.001

dataset = MaskedFaceDataset(paired_data, images_path, transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FaceEmbeddingModel(embedding_dim=128).to(device)
criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    
    for masked_img, unmasked_img, _ in dataloader:
        masked_img = masked_img.to(device)
        unmasked_img = unmasked_img.to(device)
        
        embedding_masked = model(masked_img)
        embedding_unmasked = model(unmasked_img)
        
        labels = torch.ones(masked_img.size(0)).to(device)
        
        loss = criterion(embedding_masked, embedding_unmasked, labels)
        epoch_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss / len(dataloader):.4f}")



Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000331-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000807-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000166-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000167-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000366-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Dataset/img000220-with-mask.jpg'
Skipping missing or invalid image: [Errno 2] No such file or directory: '/Users/andrew/Desktop/DA/weak122/Dataset/Data