In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

import torchvision.models as models
import torchvision.transforms as transforms

from tqdm import auto
from torch.utils.data import DataLoader, Dataset

  from .autonotebook import tqdm as notebook_tqdm


# Overview

## Siamese Network Structure

In a Siamese Network, two input images $ x_1 $ and $ x_2 $ are processed through the same network function $ f $, which extracts feature vectors from each image. This process can be mathematically represented as follows:

$v_i = f(x_i)$

where $ v_i $ is the feature vector extracted from the image $ x_i $.

## Contrastive Loss

The Contrastive Loss function is designed to train the model such that feature vectors of similar image pairs are brought closer together, while those of dissimilar pairs are pushed apart. This is mathematically formulated as:

$ L(v_1, v_2, y) = (1 - y) \cdot \|v_1 - v_2\|^2 + y \cdot \max(0, m - \|v_1 - v_2\|)^2 $

where:
- $ v_1 $ and $ v_2 $ are the feature vectors extracted from images $ x_1 $ and $ x_2 $, respectively.
- $ y $ is the label, which is 0 if the images belong to the same class and 1 if they belong to different classes.
- $ m $ is the margin, a hyperparameter that defines how far apart the feature vectors of dissimilar pairs should be.
- $ \|v_1 - v_2\| $ represents the Euclidean distance between $ v_1 $ and $ v_2 $.

This loss function encourages the network to minimize the distance between feature vectors of similar pairs and ensure that the distance between dissimilar pairs is at least the margin $ m $.

## Implementation Notes

In the implementation, a pretrained network such as ResNet18 can be utilized as the feature extractor, with modifications made to the final layer to adjust the dimensionality of the feature space. The network outputs $ v_i $, which are used to compute the Contrastive Loss. The model is trained through backpropagation to update the weights of the network based on this loss.

---

This Markdown text includes sections on the network structure, the Contrastive Loss, and implementation notes, utilizing LaTeX for the mathematical expressions. You can use this text in Markdown editors that support LaTeX for math rendering.

In [2]:
class SiameseNetworkDataset(Dataset):
    def __init__(self):
        # ここでは仮のデータを生成しています
        self.data = torch.randn(1000, 3, 64, 64)  # 1000個の3x64x64の画像

    def __getitem__(self, index):
        x1 = self.data[index]
        # 同じクラスの画像を取得（ここではランダムに選んでいます）
        x2 = self.data[np.random.randint(0, len(self.data))]
        # ラベル（ここでは0か1でランダムに）
        label = np.random.randint(0, 2)
        return x1, x2, label

    def __len__(self):
        return len(self.data)

In [3]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.convnet = nn.Sequential(
            nn.Conv2d(3, 64, 5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        self.fc = nn.Sequential(
            nn.Linear(128 * 13 * 13, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 2)
        )

    def forward_one(self, x):
        x = self.convnet(x)
        x = x.view(x.size()[0], -1)
        x = self.fc(x)
        return x

    def forward(self, x1, x2):
        out1 = self.forward_one(x1)
        out2 = self.forward_one(x2)
        return out1, out2

In [4]:
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = nn.functional.pairwise_distance(output1, output2)
        
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

In [5]:
model = SiameseNetwork().cuda()
criterion = ContrastiveLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# データローダーの設定
dataset = SiameseNetworkDataset()
train_dataloader = DataLoader(dataset, shuffle=True, batch_size=10)

In [6]:
for epoch in range(10):
    for data in train_dataloader:
        img1, img2, label = data
        img1, img2, label = img1.cuda(), img2.cuda(), label.cuda()
        optimizer.zero_grad()
        output1, output2 = model(img1, img2)
        loss = criterion(output1, output2, label.float())
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/10], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 1.1434
Epoch [2/10], Loss: 1.9985
Epoch [3/10], Loss: 1.8040
Epoch [4/10], Loss: 1.5590
Epoch [5/10], Loss: 0.6502
Epoch [6/10], Loss: 1.1401
Epoch [7/10], Loss: 0.7549
Epoch [8/10], Loss: 1.0407
Epoch [9/10], Loss: 0.6354
Epoch [10/10], Loss: 1.6687
