# [Siamese Neural Networks for One-shot Image Recognition](https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf)

A Pytorch Implementation

In [None]:
! pip install torchscan

Collecting torchscan
  Downloading https://files.pythonhosted.org/packages/30/8f/0f255453c3c44990cb879574ac023f7484acfb225d7f17468cbebc65bf01/torchscan-0.1.1-py3-none-any.whl
Installing collected packages: torchscan
Successfully installed torchscan-0.1.1


In [None]:
# https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf

import torch
import torch.nn as nn
import warnings
warnings.filterwarnings('ignore')

In [None]:
# To be used as the feature extractor, in order to compare the features

class FeatExt(nn.Module):
    def __init__(self):
        super(FeatExt, self).__init__()

        # input.shape: (1, 105, 105)

        self.cnn1 = nn.Conv2d(1, 64, kernel_size=10) # (1, 105, 105) -> (64, 96, 96)
        self.relu1 = nn.ReLU()

        # feature maps.shape: (64, 96, 96)
        self.max_pool1 = nn.MaxPool2d(kernel_size=2) # (64, 96, 96) -> (64, 48, 48)

        # feature maps.shape: (64, 48, 48)
        self.cnn2 = nn.Conv2d(64, 128, kernel_size=7) # (64, 48, 48) -> (128, 42, 42)
        self.relu2 = nn.ReLU()

        # feature maps.shape: (128, 42, 42)
        self.max_pool2 = nn.MaxPool2d(kernel_size=2) # (128, 42, 42) -> (128, 21, 21)

        # feature maps.shape: (128, 21, 21)
        self.cnn3 = nn.Conv2d(128, 128, kernel_size=4) # (128, 21, 21) -> (128, 18, 18)
        self.relu3 = nn.ReLU()

        # feature maps.shape: (128, 18, 18)
        self.max_pool3 = nn.MaxPool2d(kernel_size=2) # (128, 18, 18) -> (128, 9, 9)

        # feature maps.shape: (128, 9, 9)
        self.cnn4 = nn.Conv2d(128, 256, kernel_size=4) # (128, 9, 9) -> (256, 6, 6)
        self.relu4 = nn.ReLU()

        # feature maps.shape: (256, 6, 6)
        # torch.flatten: (256, 6, 6) -> (9216)

        # feature maps.shape: (9216)
        self.fc1 = nn.Linear(9216, 4096) # (9216) -> (4096)
        self.sigmoid1 = nn.Sigmoid()

        # # feature maps.shape: (4096)
        # self.fc2 = nn.Linear(4096, 1)
        # self.sigmoid2 = nn.Sigmoid()


    def forward(self, x):
        x = self.max_pool1(self.relu1(self.cnn1(x)))

        x = self.max_pool2(self.relu2(self.cnn2(x)))

        x = self.max_pool3(self.relu3(self.cnn3(x)))

        x = self.relu4(self.cnn4(x))

        x = torch.flatten(x, start_dim=1)

        x = self.sigmoid1(self.fc1(x))

        # x = self.sigmoid2(self.fc2(x))

        return x


# if __name__ == '__main__':
#     model = SiameseNet()
#     batch_size = 5
#     input = torch.rand(size=(batch_size, 1, 105, 105))
#     out = model(input)
#     print(input.shape, out.shape)
#     assert tuple(out.shape) == (batch_size, 1)

In [None]:
class SiameseNet(nn.Module):
  def __init__(self):
    super(SiameseNet, self).__init__()
    self.feat = FeatExt()
    
    # feature maps.shape: (4096)
    self.fc = nn.Linear(4096, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x1 = self.feat(x)
    x2 = self.feat(x)

    return self.sigmoid(self.fc((x1-x2).abs()))

#### Weight Initilization

* They initialized all network weights
in the convolutional layers from a normal distribution with
zero-mean and a standard deviation of `10−2`
* Biases were
also initialized from a normal distribution, but with mean `0.5` and standard deviation `10−2`
* In the fully-connected
layers, the biases were initialized in the same way as the
convolutional layers, but the weights were drawn from a
much wider normal distribution with zero-mean and standard deviation `2 × 10−1`
.

In [None]:
def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.normal_(m.weight, 0, 2e-1)
    nn.init.normal_(m.bias, 0, 1e-2)
  elif type(m) == nn.Conv2d:
    nn.init.normal_(m.weight, 0, 1e-2)
    nn.init.normal_(m.bias, 0.5, 1e-2)


In [None]:
# Just a quick check that every thing works as it should be

model = SiameseNet()
batch_size = 5
input = torch.rand(size=(batch_size, 1, 105, 105))
out = model(input)
print(input.shape, out.shape)
assert tuple(out.shape) == (batch_size, 1)

torch.Size([5, 1, 105, 105]) torch.Size([5, 1])


## Getting & preparing the dataset

In [None]:
import os

working_dir = '/content/drive/MyDrive/siamese_net'
os.chdir(working_dir)

In [None]:
from torchvision.datasets import Omniglot
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
import logging
from tqdm import tqdm

In [None]:
ds = Omniglot(root='./data', download=True, transform=np.array)

Files already downloaded and verified


In [None]:
# https://github.com/Rhcsky/siamese-one-shot-pytorch/blob/main/data_loader.py

class OmniglotDS(Dataset):
  def __init__(self, ds):
    super(OmniglotDS, self).__init__()
    self.ds = ds
    self.mean = 0.8444
    self.std = 0.5329

  def __len__(self):
    return len(self.ds)

  def __getitem__(self, idx):
    img1, cls1 = random.choice(self.ds)
    img2, cls2 = random.choice(self.ds)

    # if they are from same class
    if idx % 2 == 1:
      label = 1.0
      while cls1 != cls2:
        print('Found Same')
        idx1 = random.randint(0, len(self.ds))
        img2, cls2 = self.ds[idx1] # random.choice(self.ds)
    # they are from different class
    else:
      label = 0.0
      while cls1 == cls2:
        print('Found Same')
        idx2 = random.randint(0, len(self.ds))
        img2, cls2 = self.ds[idx2] # random.choice(self.ds)

    return {
        'img1': torch.from_numpy(img1),
        'img2': torch.from_numpy(img2),
        'label': torch.from_numpy(np.array(label, dtype=np.float32))
    }



In [None]:
dl = DataLoader(OmniglotDS(ds), batch_size=128)

In [None]:
len(ds)

In [None]:
# for x in ds:
  # print(x[1])
  
# print([y[1] for y in tqdm(list(ds)[:100])])
l = []
for item in tqdm(range(200)):
  idx = random.randint(0, len(ds))
  l.append(ds[idx][1])

print(l)

In [None]:
# our data is of required shape as we wanted

for x in dl:
  for y in x:
    print(type(y), y.shape)
  break

## Loss & optimizers

- Binary Cross Entropy Loss with L2 Norm

In [None]:
from torch.optim import Adam
from torchsummary import summary


In [None]:
model.apply(init_weights)

SiameseNet(
  (feat): FeatExt(
    (cnn1): Conv2d(1, 64, kernel_size=(10, 10), stride=(1, 1))
    (relu1): ReLU()
    (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnn2): Conv2d(64, 128, kernel_size=(7, 7), stride=(1, 1))
    (relu2): ReLU()
    (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnn3): Conv2d(128, 128, kernel_size=(4, 4), stride=(1, 1))
    (relu3): ReLU()
    (max_pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnn4): Conv2d(128, 256, kernel_size=(4, 4), stride=(1, 1))
    (relu4): ReLU()
    (fc1): Linear(in_features=9216, out_features=4096, bias=True)
    (sigmoid1): Sigmoid()
    (fc2): Linear(in_features=4096, out_features=1, bias=True)
    (sigmoid2): Sigmoid()
  )
  (fc): Linear(in_features=4096, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [None]:
criterian = nn.BCELoss()
optimizer = Adam(model.parameters(), weight_decay=0.1)

In [None]:
summary(model, (1, 105, 105))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 96, 96]           6,464
              ReLU-2           [-1, 64, 96, 96]               0
         MaxPool2d-3           [-1, 64, 48, 48]               0
            Conv2d-4          [-1, 128, 42, 42]         401,536
              ReLU-5          [-1, 128, 42, 42]               0
         MaxPool2d-6          [-1, 128, 21, 21]               0
            Conv2d-7          [-1, 128, 18, 18]         262,272
              ReLU-8          [-1, 128, 18, 18]               0
         MaxPool2d-9            [-1, 128, 9, 9]               0
           Conv2d-10            [-1, 256, 6, 6]         524,544
             ReLU-11            [-1, 256, 6, 6]               0
           Linear-12                 [-1, 4096]      37,752,832
          Sigmoid-13                 [-1, 4096]               0
          FeatExt-14                 [-

## Training Loop 

- LR Schedular

In [None]:
NUM_EPOCH = 10

In [None]:
for 

## Rough

In [None]:
l = nn.BCELoss()

x, y = torch.tensor([[0.01]]), torch.tensor([[1.0]])
print(x, y, l(x, y))

tensor([[0.0100]]) tensor([[1.]]) tensor(4.6052)
