1. Скачать датасет   https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
2. Написать "сиамскую" нейросеть
3. *Используя BCELoss обучить нейронку кодировать изображения одного человека (одного класса) похожим образом
4. С помощью t-SNE визуализировать результаты работы (использовать тестовый датасет)
5. Визуализировать результаты работы (inference) в виде - пара изображений + distance
6. Вместо BCELoss использовать "Contrastive loss function", сравнить результаты

In [1]:
import os
import random


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
class PairDataset(Dataset):
    def __init__(self, image_folders, transform=None, train=True):
        self.image_folders = image_folders
        self.transform = transform
        self.train = train

        self.pairs = self.create_pairs()

    def create_pairs(self):
        pairs = []
        for folder in self.image_folders:
            images = os.listdir(folder)
            if self.train:

                train_images = images[:7]

                for img1 in range(len(train_images)):
                    for img2 in range(img1+1, len(train_images)):
                        pairs.append((os.path.join(folder, train_images[img1]), 
                                      os.path.join(folder, train_images[img2]), 1))

                for img1 in range(len(train_images)):
                    other_folder = random.choice(self.image_folders)
                    if other_folder != folder:
                        other_image = random.choice(os.listdir(other_folder))
                        pairs.append((os.path.join(folder, train_images[img1]), 
                                      os.path.join(other_folder, other_image), 0))
            else:

                test_images = images[7:]

                for img1 in range(len(test_images)):
                    for img2 in range(img1+1, len(test_images)):
                        pairs.append((os.path.join(folder, test_images[img1]), 
                                      os.path.join(folder, test_images[img2]), 1))

                    other_folder = random.choice(self.image_folders)
                    if other_folder != folder:
                        other_image = random.choice(os.listdir(other_folder))
                        pairs.append((os.path.join(folder, test_images[img1]), 
                                      os.path.join(other_folder, other_image), 0))
        return pairs

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.pairs[idx]
        img1 = Image.open(img1_path).convert("RGB")
        img2 = Image.open(img2_path).convert("RGB")

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float)

    def __len__(self):
        return len(self.pairs)


In [3]:
# normalize
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1), 
    transforms.Resize((224, 224)),                 
    transforms.ToTensor(),                        
    transforms.Normalize(mean=[0.5], std=[0.5])    
])


image_folders = [f"dataset/s{folder}" for folder in range (1,41)]

train_dataset = PairDataset(image_folders=image_folders, transform=transform, train=True)
test_dataset = PairDataset(image_folders=image_folders, transform=transform, train=False)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

img1, img2, labels = next(iter(train_loader))
print (img1.shape, img2.shape, labels.shape)

torch.Size([32, 1, 224, 224]) torch.Size([32, 1, 224, 224]) torch.Size([32])


In [4]:
class SiameseNet(nn.Module):
    def __init__(self):
        super(SiameseNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 56 * 56, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.drop = nn.Dropout(0.3)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        x = x.view(x.size(0), -1)  
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        x = self.drop(x)
        return x

In [5]:
class half2(nn.Module):
    def __init__(self):
        super(half2, self).__init__()
        self.conv1 = nn.Conv2d(3,20,(11,11))
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d((2,2))
        self.conv2 = nn.Conv2d(20,40,(5,5))
        self.conv3 = nn.Conv2d(40,80,(3,3))
        self.fc1 = nn.Linear(80,160)
        self.fc2 = nn.Linear(160,80)
    def forward(self,img):
        res = self.relu(self.conv1(img))
        res = self.maxpool(res)
        res = self.relu(self.conv2(res))
        res = self.relu(self.conv3(res))
        res = res.view((res.shape[1],res.shape[2],res.shape[0]))
        res = self.relu(self.fc1(res))
        res = torch.relu(self.fc2(res))
        return torch.flatten(res)

In [6]:
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim=True)
        
        loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
                          (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SiameseNet()
model = model.to(device)

print (torch.cuda.is_available())

criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.4)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for img1, img2, labels in train_loader:
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

        optimizer.zero_grad()

        output1, output2 = (model(img1), model(img2))

        loss = criterion(output1, output2, labels)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

torch.save(model, 'siamese_model_full_contrastive.pth')

True
Epoch 1/10, Loss: 0.1958
Epoch 2/10, Loss: 0.1911
Epoch 3/10, Loss: 0.1899
Epoch 4/10, Loss: 0.1887
Epoch 5/10, Loss: 0.1878
Epoch 6/10, Loss: 0.1887
Epoch 7/10, Loss: 0.1872
Epoch 8/10, Loss: 0.1874
Epoch 9/10, Loss: 0.1875
Epoch 10/10, Loss: 0.1879


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SiameseNet()
model = model.to(device)

print (torch.cuda.is_available())

criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.4)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for img1, img2, labels in train_loader:
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

        optimizer.zero_grad()

        output1, output2 = (model(img1), model(img2))
        cosine_similarity = nn.functional.cosine_similarity(output1, output2)

        probability = torch.sigmoid(cosine_similarity)

        loss = criterion(probability, labels.float())
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

torch.save(model, 'siamese_model_full_BCE.pth')

True
Epoch 1/10, Loss: 0.5717
Epoch 2/10, Loss: 0.5688
Epoch 3/10, Loss: 0.5673
Epoch 4/10, Loss: 0.5621
Epoch 5/10, Loss: 0.5545
Epoch 6/10, Loss: 0.5441
Epoch 7/10, Loss: 0.5337
Epoch 8/10, Loss: 0.5269
Epoch 9/10, Loss: 0.5207
Epoch 10/10, Loss: 0.5166
