# Siamese neural networks training

This notebook presents the paper ["Siamese Neural Networks for One-shot Image Recognition"](https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf) coded with PyTorch framework. 

In this part we train Siamese network on the Omniglot dataset to perform the classification task to distinguish two images of the same class or different classes.


References:
- [paper](https://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf)
- [omniglot](https://github.com/brendenlake/omniglot)
- [keras-oneshot](https://github.com/sorenbouma/keras-oneshot)


In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
import numpy as np
import cv2

In [3]:
sys.path.append("..")

In [4]:
HAS_GPU = True

## Setup dataflow

In [6]:
from dataflow import OmniglotDataset, SameOrDifferentPairsDataset, PairTransformedDataset
from common_utils.imgaug import RandomAffine, RandomApply
from common_utils.dataflow import TransformedDataset, OnGPUDataLoader, ResizedDataset
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
import torch

In [93]:
np.random.seed(12345)

OMNIGLOT_REPO_PATH='omniglot'

TRAIN_DATA_PATH = os.path.join(OMNIGLOT_REPO_PATH, 'python', 'images_background')
train_alphabets = !ls {TRAIN_DATA_PATH}
train_alphabets = list(train_alphabets)

TEST_DATA_PATH = os.path.join(OMNIGLOT_REPO_PATH, 'python', 'images_evaluation')
test_alphabets = !ls {TEST_DATA_PATH}
test_alphabets = list(test_alphabets)

assert len(train_alphabets) > 1 and len(test_alphabets) > 1, "%s \n %s" % (train_alphabets[0], test_alphabets[0])

train_alphabet_char_id_drawer_ids = {}
for a in train_alphabets:
    res = !ls "{os.path.join(TRAIN_DATA_PATH, a)}"
    char_ids = list(res)
    train_alphabet_char_id_drawer_ids[a] = {}
    for char_id in char_ids:
        res = !ls "{os.path.join(TRAIN_DATA_PATH, a, char_id)}"
        train_alphabet_char_id_drawer_ids[a][char_id] = [_id[:-4] for _id in list(res)]
        
        
test_alphabet_char_id_drawer_ids = {}
for a in test_alphabets:
    res = !ls "{os.path.join(TEST_DATA_PATH, a)}"
    char_ids = list(res)
    test_alphabet_char_id_drawer_ids[a] = {}
    for char_id in char_ids:
        res = !ls "{os.path.join(TEST_DATA_PATH, a, char_id)}"
        test_alphabet_char_id_drawer_ids[a][char_id] = [_id[:-4] for _id in list(res)]


# Sample 12 drawers out of 20
all_drawers_ids = np.arange(20) 
# train_drawers_ids = np.random.choice(all_drawers_ids, size=12, replace=False)
train_drawers_ids = np.arange(20)
# Sample 4 drawers out of remaining 8
val_drawers_ids = np.random.choice(list(set(all_drawers_ids) - set(train_drawers_ids)), size=4, replace=False)
test_drawers_ids = np.array(list(set(all_drawers_ids) - set(val_drawers_ids) - set(train_drawers_ids)))

def create_str_drawers_ids(drawers_ids):
    return ["_{0:0>2}".format(_id) for _id in drawers_ids]

train_drawers_ids = create_str_drawers_ids(train_drawers_ids)
val_drawers_ids = create_str_drawers_ids(val_drawers_ids)
test_drawers_ids = create_str_drawers_ids(test_drawers_ids)

train_ds = OmniglotDataset("Train", data_path=TRAIN_DATA_PATH, 
                           alphabet_char_id_drawers_ids=train_alphabet_char_id_drawer_ids, 
                           drawers_ids=train_drawers_ids)

val_ds = OmniglotDataset("Test", data_path=TEST_DATA_PATH, 
                         alphabet_char_id_drawers_ids=test_alphabet_char_id_drawer_ids, 
                         drawers_ids=val_drawers_ids)

test_ds = OmniglotDataset("Test", data_path=TEST_DATA_PATH, 
                          alphabet_char_id_drawers_ids=test_alphabet_char_id_drawer_ids, 
                          drawers_ids=test_drawers_ids)

train_ds = ResizedDataset(train_ds, output_size=(28, 28))
val_ds = ResizedDataset(val_ds, output_size=(28, 28))
test_ds = ResizedDataset(test_ds, output_size=(28, 28))

In [94]:
train_pairs = SameOrDifferentPairsDataset(train_ds, nb_pairs=int(30e3))
val_pairs = SameOrDifferentPairsDataset(val_ds, nb_pairs=int(10e3))
test_pairs = SameOrDifferentPairsDataset(test_ds, nb_pairs=int(10e3))

len(train_pairs), len(val_pairs), len(test_pairs)

(30000, 10000, 10000)

In [95]:
train_data_aug = Compose([
    RandomApply(
        RandomAffine(rotation=(-10, 10), scale=(0.8, 1.2), translate=(-0.05, 0.05)),
        proba=0.5
    ),
    ToTensor()
])

test_data_aug = Compose([
    ToTensor()
])

y_transform = lambda y: torch.FloatTensor([y])

train_aug_pairs = PairTransformedDataset(train_pairs, x_transforms=train_data_aug, y_transforms=y_transform)
val_aug_pairs = PairTransformedDataset(val_pairs, x_transforms=test_data_aug, y_transforms=y_transform)
test_aug_pairs = PairTransformedDataset(test_pairs, x_transforms=test_data_aug, y_transforms=y_transform)

In [96]:
batch_size = 64

_DataLoader = OnGPUDataLoader if HAS_GPU and torch.cuda.is_available() else DataLoader

train_batches = _DataLoader(train_aug_pairs, batch_size=batch_size, 
                            shuffle=True, num_workers=12, 
                            drop_last=True)

val_batches = _DataLoader(val_aug_pairs, batch_size=batch_size, 
                          shuffle=True, num_workers=12,
                          pin_memory=True, drop_last=True)

test_batches = _DataLoader(test_aug_pairs, batch_size=batch_size, 
                           shuffle=False, num_workers=12,                   
                           pin_memory=True, drop_last=False)


len(train_batches), len(val_batches), len(test_batches)

(468, 156, 157)

In [97]:
for (x1, x2), y in train_batches:
    print(x1.size(), x2.size(), y.size())
    print(type(x1), type(x1), type(y))    
    break

torch.Size([64, 1, 28, 28]) torch.Size([64, 1, 28, 28]) torch.Size([64, 1])
<class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'>


## Setup model, loss function and optimisation algorithm

#### Weight regularization

L2 weights regularization: 

#### Loss function

Binary cross-entropy

In [98]:
from torch.autograd import Variable
from torch.nn import BCEWithLogitsLoss
from torch.nn.functional import sigmoid
from torch.optim import Adam, RMSprop, SGD
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau

In [99]:
from datetime import datetime
from common_utils.training_utils import train_one_epoch, validate, write_csv_log, write_conf_log, verbose_optimizer, save_checkpoint
from common_utils.training_utils import accuracy

In [181]:
from model import SiameseNetworks, ATwoLayer, OmniV, Classifier2

In [182]:
siamese_net = Classifier2(image_size=28*2, tasks = 1)
# siamese_net = OmniV(image_size=28, tasks = 1)
# siamese_net = ATwoLayer(input_size=105*105*2, output = 1, tasks = 1)
# siamese_net = SiameseNetworks(input_shape=(105, 105, 1))
if HAS_GPU and torch.cuda.is_available():
    siamese_net = siamese_net.cuda()

In [183]:
def accuracy_logits(y_logits, y_true):
    y_pred = sigmoid(y_logits).data
    return accuracy(y_pred, y_true)

In [184]:
criterion = BCEWithLogitsLoss()
if HAS_GPU and torch.cuda.is_available():
    criterion = criterion.cuda()

In [185]:
# Test single forward pass and loss function computation
siamese_net.eval()
for i, ((batch_x1, batch_x2), batch_y) in enumerate(train_batches):
    
    batch_x1 = Variable(batch_x1, requires_grad=True)
    batch_x2 = Variable(batch_x2, requires_grad=True)    
    batch_y = Variable(batch_y)
    
#     batch_y_logits = siamese_net(batch_x1, batch_x2)
#     batch_y_logits = siamese_net.forward(batch_x1, batch_x2,task=0)
    batch_x = torch.cat([batch_x1,batch_x2],dim=1)
    print(batch_x.shape)
    batch_y_logits = siamese_net.forward(batch_x, task=0)


    print(type(batch_y.data), type(batch_y_logits.data), batch_y.size(), batch_y_logits.size())    
    loss = criterion(batch_y_logits, batch_y)
    print("Loss : ", loss.data)
    
    print("Accuracy : ", accuracy_logits(batch_y_logits.data, batch_y.data))
    break

torch.Size([64, 64, 1, 1])
torch.Size([64, 64])
DatasetError: size mismatch, m1: [64 x 64], m2: [576 x 1] at ../aten/src/TH/generic/THTensorMath.cpp:961


TypeError: sigmoid(): argument 'input' (position 1) must be Tensor, not NoneType

In [159]:
x1 = torch.randn(64,28,28)
x2 = torch.randn(64,28,28)
print(torch.cat([x1,x2],dim=1).shape)

torch.Size([64, 56, 28])


In [152]:
# optimizer = Adam([{
#     'params': siamese_net.net.features.parameters(),
#     'lr': conf['lr_features'],    
# }, {
#     'params': siamese_net.classifier.parameters(),
#     'lr': conf['lr_classifier']
# }],
#     weight_decay=conf['weight_decay']
# )

optimizer = Adam(siamese_net.parameters(),lr=0.001)

Note that we define L2 regularization weights through optimizer API as `weight_decay` parameter, [ref](http://pytorch.org/docs/master/optim.html?highlight=adam#torch.optim.Adam)

### Start training

In [153]:
now = datetime.now()
logs_path = os.path.join('logs', 'seamese_networks_verification_task_%s' % (now.strftime("%Y%m%d_%H%M")))
if not os.path.exists(logs_path):
    os.makedirs(logs_path)

In [154]:
from common_utils.training_utils import train_one_epoch, validate, write_csv_log, write_conf_log, verbose_optimizer, save_checkpoint
write_conf_log(logs_path, "{}".format(conf))
write_conf_log(logs_path, verbose_optimizer(optimizer))

write_csv_log(logs_path, "epoch,train_loss,train_acc,val_loss,val_acc")

best_acc = 0.0
for epoch in range(conf['n_epochs']):
#     scheduler.step()
    # Verbose learning rates:
    print(verbose_optimizer(optimizer))

    # train for one epoch
    ret = train_one_epoch(siamese_net, train_batches, 
                          criterion, optimizer,                                               
                          epoch, conf['n_epochs'], avg_metrics=[accuracy_logits,])
    if ret is None:
        break
    train_loss, train_acc = ret

    # evaluate on validation set
    ret = validate(siamese_net, val_batches, criterion, avg_metrics=[accuracy_logits, ])
    if ret is None:
        break
    val_loss, val_acc = ret
    
#     onplateau_scheduler.step(val_loss)

    # Write a csv log file
    write_csv_log(logs_path, "%i,%f,%f,%f,%f" % (epoch, train_loss, train_acc, val_loss, val_acc))

    # remember best accuracy and save checkpoint
    if val_acc > best_acc:
        best_prec1 = max(val_acc, best_acc)
        save_checkpoint(logs_path, 'val_acc', 
                        {'epoch': epoch + 1,
                         'state_dict': siamese_net.state_dict(),
                         'val_acc': val_acc,           
                         'optimizer': optimizer.state_dict()})        

  0%|          | 0/468 [00:00<?, ?it/s]


Optimizer: Adam
- Param group: 
	lr: 1e-05
	betas: (0.9, 0.999)
	eps: 1e-08
	weight_decay: 0
	amsgrad: False



Epoch: 1/50:  95%|#########5| 446/468 [04:23<00:11,  1.83it/s, Loss 0.6931 | accuracy_logits 0.500]Traceback (most recent call last):
  File "/anaconda3/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/anaconda3/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/anaconda3/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/anaconda3/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):
Traceback (most recent call last):
  File "/anaconda3/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/anaconda3/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/anaconda3/lib/python3.6/multiprocessing/c

### Inference on testing dataset

In [27]:
from common_utils.training_utils import load_checkpoint
from glob import glob

In [28]:
best_model_filenames = glob(os.path.join(logs_path, "model_val_acc=*"))
assert len(best_model_filenames) == 1
load_checkpoint(best_model_filenames[0], siamese_net)

AssertionError: 

In [29]:
# evaluate on validation set
test_loss, test_acc = validate(siamese_net, test_batches, criterion, avg_metrics=[accuracy_logits, ])
test_loss, test_acc

  batch_x = [Variable(batch_, volatile=True) for batch_ in batch_x]
  batch_y = Variable(batch_y, volatile=True)
100%|##########| 157/157 [1:39:29<00:00,  3.76s/it, Loss 0.6924 | accuracy_logits 0.500]     


(0.692438431930542, tensor(0.5000))

### Run training script

In [None]:
!python3 train_model.py