In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
!ln -s /content/gdrive/MyDrive /mydrive
%cd /mydrive/


# Experiments
We'll go through learning feature embeddings using different loss functions on leopard  dataset. We are using 512-dimensional embeddings.

For every experiment Resnet18() is used currently no  hyperparameter search is implemented.

# Prepare dataset
We'll be working on leopard dataset

In [1]:
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torch.utils.data as data
import torch
transform_img = transforms.Compose([
    #transforms.Resize(size= (128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225] )
    ])

In [2]:
#%cd /mydrive/W210/Animal_Identification/siamese-triplet/

from siamese_triplet.datasets import LeopardDataset

MULTI_EMBEDDING = True
cuda = torch.cuda.is_available()

if MULTI_EMBEDDING:
    TRAIN_DATA_PATH = '../datasets/leopard/classes_64/resize_256/train'
    train_dataset = LeopardDataset(image_dir=TRAIN_DATA_PATH,transform=transform_img)
    TEST_DATA_PATH = '../datasets/leopard/classes_64/resize_256/test'
    test_dataset = LeopardDataset(image_dir=TEST_DATA_PATH,transform=transform_img)

else:
    TRAIN_DATA_PATH = '../../datasets/temp'
    train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_img)
    TEST_DATA_PATH = '../../datasets/temp'
    test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_img)

## Common setup

# Baseline: Classification with softmax
We'll train the model for classification and use outputs of penultimate layer as embeddings

# Online pair/triplet selection - negative mining
There are couple of problems with siamese and triplet networks.
1. The number of possible pairs/triplets grows **quadratically/cubically** with the number of examples. It's infeasible to process them all
2. We generate pairs/triplets randomly. As the training continues, more and more pairs/triplets are easy to deal with (their loss value is very small or even 0), preventing the network from training. We need to provide the network with **hard examples**.
3. Each image that is fed to the network is used only for computation of contrastive/triplet loss for only one pair/triplet. The computation is somewhat wasted; once the embedding is computed, it could be reused for many pairs/triplets.

To deal with that efficiently, we'll feed a network with standard mini-batches as we did for classification. The loss function will be responsible for selection of hard pairs and triplets within mini-batch. In these case, if we feed the network with 16 images per 10 classes, we can process up to $159*160/2 = 12720$ pairs and $10*16*15/2*(9*16) = 172800$ triplets, compared to 80 pairs and 53 triplets in previous implementation.

We can find some strategies on how to select triplets in [2] and [3] *Alexander Hermans, Lucas Beyer, Bastian Leibe, [In Defense of the Triplet Loss for Person Re-Identification](https://arxiv.org/pdf/1703.07737), 2017*

## Online triplet selection
## Steps
1. Create **BalancedBatchSampler** - samples $N$ classes and $M$ samples *datasets.py*
2. Create data loaders with the batch sampler
3. Define **embedding** *(mapping)* network $f(x)$ - **EmbeddingNet** from *networks.py*
4. Define a **TripletSelector** that takes embeddings and original labels and returns valid triplets within a minibatch
5. Define **OnlineTripletLoss** that will use a *TripletSelector* and compute *TripletLoss* on such pairs
6. Train the network!

In [3]:
import numpy
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

from siamese_triplet.trainer import fit
import numpy as np
from siamese_triplet.datasets import BalancedBatchSampler
import torch.nn as nn

    
train_labels = torch.tensor(train_dataset.targets)
test_labels = torch.tensor(test_dataset.targets)
# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples from each class
train_batch_sampler = BalancedBatchSampler(train_labels, n_classes=64, n_samples=8)
test_batch_sampler = BalancedBatchSampler(test_labels, n_classes=64, n_samples=2)
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_sampler, **kwargs)

# Set up the network and training parameters
from siamese_triplet.networks import EmbeddingNet

from siamese_triplet.losses import OnlineTripletLoss
from siamese_triplet.losses import OnlineSymTripletLoss
from siamese_triplet.losses import OnlineModTripletLoss

from siamese_triplet.utils_triplet import AllTripletSelector
from siamese_triplet.utils_triplet import HardestNegativeTripletSelector
from siamese_triplet.utils_triplet import RandomNegativeTripletSelector
from siamese_triplet.utils_triplet import SemihardNegativeTripletSelector
from siamese_triplet.utils_triplet import SemihardNegativeCentroidTripletSelector # Strategies for selecting triplets within a minibatch

from siamese_triplet.metrics import AverageNonzeroTripletsMetric
from sklearn.metrics import f1_score, classification_report 

margin = 0.25

try:
    model
    del model
except NameError:
    print('Model does not exist')

softmax = False
embedding_net = EmbeddingNet()        
model = embedding_net

if cuda:
    model.cuda()
loss_fn = OnlineTripletLoss(margin, SemihardNegativeTripletSelector(margin))
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 20
log_interval = 50
softmax_loss_fn = nn.CrossEntropyLoss()

Model does not exist


In [4]:
fit(online_train_loader, 
    online_test_loader, 
    model, loss_fn, 
    softmax_loss_fn, 
    optimizer, 
    scheduler, 
    n_epochs, 
    cuda, 
    log_interval, 
    metrics=[AverageNonzeroTripletsMetric()], 
    multi_class=False, #MULTI_EMBEDDING,
    softmax=softmax)

Model Saved
Epoch: 1/20. Train set: Average loss: 0.1458	Average nonzero triplets: 1691.0
Epoch: 1/20. Validation set: Average loss: 0.1427	Average nonzero triplets: 53.0
Epoch: 2/20. Train set: Average loss: 0.1193	Average nonzero triplets: 1689.0
Epoch: 2/20. Validation set: Average loss: 0.1465	Average nonzero triplets: 53.0
Epoch: 3/20. Train set: Average loss: 0.1162	Average nonzero triplets: 1691.0
Epoch: 3/20. Validation set: Average loss: 0.1244	Average nonzero triplets: 53.0
Epoch: 4/20. Train set: Average loss: 0.1146	Average nonzero triplets: 1666.0
Epoch: 4/20. Validation set: Average loss: 0.1316	Average nonzero triplets: 53.0
Epoch: 5/20. Train set: Average loss: 0.1115	Average nonzero triplets: 1672.0
Epoch: 5/20. Validation set: Average loss: 0.1237	Average nonzero triplets: 53.0
Model Saved
Epoch: 6/20. Train set: Average loss: 0.1128	Average nonzero triplets: 1649.0
Epoch: 6/20. Validation set: Average loss: 0.1273	Average nonzero triplets: 52.0
Epoch: 7/20. Train set

In [5]:
train_eval_loader = data.DataLoader(train_dataset, 
                                    batch_size=16, 
                                    shuffle=False,  
                                    num_workers=2, 
                                    drop_last=True, 
                                    pin_memory=cuda)

train_emb, train_softmax, train_ref_labels, train_pred_labels = extract_embeddings(train_eval_loader, 
                                                                                   model, 
                                                                                   multi_class=False, 
                                                                                   softmax=softmax,
                                                                                   cuda=cuda)


test_eval_loader = data.DataLoader(test_dataset, 
                                   batch_size=16, 
                                   shuffle=False,  
                                   num_workers=2, 
                                   drop_last=True, 
                                   pin_memory=cuda)

test_emb, test_softmax, test_ref_labels, test_pred_labels= extract_embeddings(test_eval_loader, 
                                                                              model,
                                                                              multi_class=False, 
                                                                              softmax=softmax,
                                                                              cuda=cuda)
                                                                              


In [7]:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
train_emb_cpu = train_emb.data.cpu().numpy()
test_emb_cpu = test_emb.data.cpu().numpy()
clf.fit(train_emb_cpu, train_ref_labels)

train_pred_labels = clf.predict(train_emb_cpu)
train_acc = accuracy_score(train_ref_labels, train_pred_labels)

test_pred_labels = clf.predict(test_emb_cpu)
test_acc = accuracy_score(test_ref_labels, test_pred_labels)

print("Training Accuracy: " + str(train_acc))
print("Testing Accuracy: " + str(test_acc))

Training Accuracy: 0.9481132075471698
Testing Accuracy: 0.40625


In [10]:
print(classification_report(train_ref_labels, train_pred_labels))

              precision    recall  f1-score   support

           0       1.00      0.86      0.92         7
           1       1.00      0.83      0.91         6
           2       1.00      1.00      1.00        11
           3       1.00      1.00      1.00         8
           4       1.00      1.00      1.00        10
           5       1.00      1.00      1.00         7
           6       1.00      1.00      1.00        11
           7       1.00      1.00      1.00         7
           8       1.00      1.00      1.00         8
           9       1.00      1.00      1.00        21
          10       1.00      1.00      1.00        12
          11       1.00      0.89      0.94         9
          12       1.00      1.00      1.00         9
          13       1.00      0.92      0.96        12
          14       1.00      1.00      1.00         9
          15       0.93      1.00      0.96        13
          16       0.88      0.95      0.91        38
          17       1.00    

In [11]:
print(classification_report(test_ref_labels, test_pred_labels))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         3
           3       0.00      0.00      0.00         3
           4       1.00      1.00      1.00         2
           5       0.00      0.00      0.00         1
           6       0.50      1.00      0.67         2
           7       0.00      0.00      0.00         1
           8       1.00      0.50      0.67         2
           9       0.42      0.71      0.53         7
          10       0.67      0.40      0.50         5
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         2
          13       0.50      0.33      0.40         3
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         2
          16       0.47      0.73      0.57        11
          17       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
