In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
!ln -s /content/gdrive/MyDrive /mydrive
%cd /mydrive/


# Experiments
We'll go through learning feature embeddings using different loss functions on leopard  dataset. We are using 512-dimensional embeddings.

For every experiment Resnet18() is used currently no  hyperparameter search is implemented.

# Prepare dataset
We'll be working on leopard dataset

In [1]:
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torch.utils.data as data
import torch
transform_img = transforms.Compose([
    #transforms.Resize(size= (128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225] )
    ])

In [2]:
#%cd /mydrive/Animal_Identification/siamese-triplet/

from datasets import LeopardDataset

MULTI_EMBEDDING = True
cuda = torch.cuda.is_available()

if MULTI_EMBEDDING:
    TRAIN_DATA_PATH = '../../datasets/leopard/classes_64/resize_256/train'
    train_dataset = LeopardDataset(image_dir=TRAIN_DATA_PATH,transform=transform_img)
    TEST_DATA_PATH = '../../datasets/leopard/classes_64/resize_256/test'
    test_dataset = LeopardDataset(image_dir=TEST_DATA_PATH,transform=transform_img)
else:
    TRAIN_DATA_PATH = '../../datasets/temp'
    train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_img)
    TEST_DATA_PATH = '../../datasets/temp'
    test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_img)

## Common setup

# Baseline: Classification with softmax
We'll train the model for classification and use outputs of penultimate layer as embeddings

# Online pair/triplet selection - negative mining
There are couple of problems with siamese and triplet networks.
1. The number of possible pairs/triplets grows **quadratically/cubically** with the number of examples. It's infeasible to process them all
2. We generate pairs/triplets randomly. As the training continues, more and more pairs/triplets are easy to deal with (their loss value is very small or even 0), preventing the network from training. We need to provide the network with **hard examples**.
3. Each image that is fed to the network is used only for computation of contrastive/triplet loss for only one pair/triplet. The computation is somewhat wasted; once the embedding is computed, it could be reused for many pairs/triplets.

To deal with that efficiently, we'll feed a network with standard mini-batches as we did for classification. The loss function will be responsible for selection of hard pairs and triplets within mini-batch. In these case, if we feed the network with 16 images per 10 classes, we can process up to $159*160/2 = 12720$ pairs and $10*16*15/2*(9*16) = 172800$ triplets, compared to 80 pairs and 53 triplets in previous implementation.

We can find some strategies on how to select triplets in [2] and [3] *Alexander Hermans, Lucas Beyer, Bastian Leibe, [In Defense of the Triplet Loss for Person Re-Identification](https://arxiv.org/pdf/1703.07737), 2017*

## Online triplet selection
## Steps
1. Create **BalancedBatchSampler** - samples $N$ classes and $M$ samples *datasets.py*
2. Create data loaders with the batch sampler
3. Define **embedding** *(mapping)* network $f(x)$ - **EmbeddingNet** from *networks.py*
4. Define a **TripletSelector** that takes embeddings and original labels and returns valid triplets within a minibatch
5. Define **OnlineTripletLoss** that will use a *TripletSelector* and compute *TripletLoss* on such pairs
6. Train the network!

In [3]:
try:
    model
    del model
except NameError:
    print('Model does not exist')

Model does not exist


In [4]:
import numpy
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

from trainer import fit
import numpy as np
from datasets import BalancedBatchSampler
import torch.nn as nn

    
train_labels = torch.tensor(train_dataset.targets)
test_labels = torch.tensor(test_dataset.targets)
# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples from each class
train_batch_sampler = BalancedBatchSampler(train_labels, n_classes=64, n_samples=8)
test_batch_sampler = BalancedBatchSampler(test_labels, n_classes=64, n_samples=2)
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_sampler, **kwargs)
#train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, drop_last=True)
# Set up the network and training parameters
from networks import EmbeddingNet
from networks import EmbeddingWithSoftmaxNet
from networks import MultiPartEmbeddingNet
from networks import MultiPartEmbeddingWithSoftmaxNet

from losses import OnlineTripletLoss
from losses import OnlineSymTripletLoss
from losses import OnlineModTripletLoss
from utils_triplet import AllTripletSelector
from utils_triplet import HardestNegativeTripletSelector
from utils_triplet import RandomNegativeTripletSelector
from utils_triplet import SemihardNegativeTripletSelector # Strategies for selecting triplets within a minibatch
from metrics import AverageNonzeroTripletsMetric
from sklearn.metrics import f1_score, classification_report 

margin = 0.2

softmax = True
if MULTI_EMBEDDING:
    if softmax:
        embedding_net = MultiPartEmbeddingWithSoftmaxNet(num_classes=64)
    else:
        embedding_net = MultiPartEmbeddingNet()
else:    
    if softmax:
        embedding_net = EmbeddingWithSoftmaxNet(num_classes=64)
    else:
        embedding_net = EmbeddingNet()
model = embedding_net

if cuda:
    model.cuda()
loss_fn = OnlineTripletLoss(margin, SemihardNegativeTripletSelector(margin))
#loss_fn = OnlineSymTripletLoss(margin, RandomNegativeTripletSelector(margin))
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 20
log_interval = 50
softmax_loss_fn = nn.CrossEntropyLoss()

In [None]:
fit(online_train_loader, online_test_loader, model, loss_fn, softmax_loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval, metrics=[AverageNonzeroTripletsMetric()], multi_class=MULTI_EMBEDDING, softmax=softmax)

Epoch: 1/20. Train set: Average loss: 0.1132	Average nonzero triplets: 1688.0
Epoch: 1/20. Validation set: Average loss: 0.1275	Average nonzero triplets: 51.0
Epoch: 2/20. Train set: Average loss: 0.0991	Average nonzero triplets: 1671.0
Epoch: 2/20. Validation set: Average loss: 0.1410	Average nonzero triplets: 51.0


In [None]:
model_file_name = 'leopard_model_tr.pt'
path = f"/content/gdrive/MyDrive/siamese-triplet/{model_file_name}" 
torch.save(model.state_dict(), path)

In [None]:
def extract_embeddings(dataloader, model, multi_class=False, softmax=False):
    embeddings = []
    ref_labels = []
    pred_labels = []
    with torch.no_grad():
        model.eval()
        
        if multi_class:
            for face, flank, full, target in dataloader:
                if cuda:
                    #face = face.cuda()
                    #flank = flank.cuda()
                    full = full.cuda()
                if softmax:    
                    x,y=model.get_embedding(full)   
                    z, preds = torch.max(y.data, 1)
                    pred_labels.extend(preds.data.cpu().numpy().tolist())
                else:
                    x=model.get_embedding(full)
                
                embeddings.extend(x.data.cpu().numpy())
                ref_labels.extend(target.data.cpu().numpy().tolist())
        else:      
            for data, target in dataloader:
                if cuda:
                    data = data.cuda()
                if softmax:    
                    x,y=model.get_embedding(data)   
                    z, preds = torch.max(y.data, 1)
                    pred_labels.extend(preds.data.cpu().numpy().tolist())
                else:
                    x=model.get_embedding(data)
                
                embeddings.extend(x.data.cpu().numpy())
                ref_labels.extend(target.data.cpu().numpy().tolist())
                
    if softmax:        
        return embeddings, ref_labels, pred_labels
    else:
        return embeddings, ref_labels

In [None]:
train_eval_loader = data.DataLoader(train_dataset, batch_size=16, shuffle=False,  num_workers=2, drop_last=True, pin_memory=cuda)
train_emb, train_ref_labels, train_pred_labels = extract_embeddings(train_eval_loader, model, multi_class=True, softmax=True)
print(classification_report(train_pred_labels, train_ref_labels))#, average='weighted'))
test_eval_loader = data.DataLoader(test_dataset, batch_size=16, shuffle=False,  num_workers=2, drop_last=True, pin_memory=cuda)
test_emb, test_ref_labels, test_pred_labels= extract_embeddings(test_eval_loader, model,multi_class=True, softmax=True)
print(classification_report(test_pred_labels, test_ref_labels))#, average='weighted'))

In [None]:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(train_emb, train_ref_label)

y_pred = clf.predict(train_emb)
train_acc = accuracy_score(train_ref_label, y_pred)

y_pred = clf.predict(test_emb)
test_acc = accuracy_score(test_ref_label, y_pred)

print("Training Accuracy: " + str(train_acc))
print("Testing Accuracy: " + str(test_acc))