In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
from google.colab import drive
drive.mount('/content/drive')
!ln -s /content/drive/MyDrive /mydrive
%cd /mydrive/


Mounted at /content/drive
/content/drive/MyDrive


In [2]:
def start():
  !cp -r /content/drive/MyDrive/cropped_leopards.zip /content/
  !cp -r /content/drive/MyDrive/resized_cropped_leopards.zip /content/
  !unzip /content/resized_cropped_leopards.zip -d  /
  !unzip /content/cropped_leopards.zip -d  /

In [3]:
start()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/Animal_Identification/train/cropped_images/leop_15_000887.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_192_002633.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_280_006724.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_35_001895.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_272_003944.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_13_001772.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_190_002631.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_296_005496.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_248_004716.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leop_284_004941.jpg  
  inflating: /content/Animal_Identification/train/cropped_images/leo

In [4]:
!pip install split-folders

import splitfolders
splitfolders.ratio('/content/Animal_Identification/train/_resized/ten/', output='/content/Animal_Identification/train/_resized/ten/output', seed=1337, ratio=(0.8, 0.2))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


Copying files: 3492 files [00:00, 7986.73 files/s]


# Experiments
We'll go through learning feature embeddings using different loss functions on leopard  dataset. We are using 512-dimensional embeddings.

For every experiment Resnet18() is used currently no  hyperparameter search is implemented.

# Prepare dataset
We'll be working on leopard dataset

In [5]:
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torch.utils.data as data
import torch
transform_img = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225] )
    ])

In [6]:
%cd /mydrive/Animal_Identification/siamese-triplet/

cuda = torch.cuda.is_available()
TRAIN_DATA_PATH = '/content/Animal_Identification/train/_resized/ten/output/train/'
train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_img)
TEST_DATA_PATH = '/content/Animal_Identification/train/_resized/ten/output/val'
test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_img)

/content/drive/MyDrive/Animal_Identification/siamese-triplet


## Common setup

# Baseline: Classification with softmax
We'll train the model for classification and use outputs of penultimate layer as embeddings

# Online pair/triplet selection - negative mining
There are couple of problems with siamese and triplet networks.
1. The number of possible pairs/triplets grows **quadratically/cubically** with the number of examples. It's infeasible to process them all
2. We generate pairs/triplets randomly. As the training continues, more and more pairs/triplets are easy to deal with (their loss value is very small or even 0), preventing the network from training. We need to provide the network with **hard examples**.
3. Each image that is fed to the network is used only for computation of contrastive/triplet loss for only one pair/triplet. The computation is somewhat wasted; once the embedding is computed, it could be reused for many pairs/triplets.

To deal with that efficiently, we'll feed a network with standard mini-batches as we did for classification. The loss function will be responsible for selection of hard pairs and triplets within mini-batch. In these case, if we feed the network with 16 images per 10 classes, we can process up to $159*160/2 = 12720$ pairs and $10*16*15/2*(9*16) = 172800$ triplets, compared to 80 pairs and 53 triplets in previous implementation.

We can find some strategies on how to select triplets in [2] and [3] *Alexander Hermans, Lucas Beyer, Bastian Leibe, [In Defense of the Triplet Loss for Person Re-Identification](https://arxiv.org/pdf/1703.07737), 2017*

## Online pair selection
## Steps
1. Create **BalancedBatchSampler** - samples $N$ classes and $M$ samples *datasets.py*
2. Create data loaders with the batch sampler
3. Define **embedding** *(mapping)* network $f(x)$ - **EmbeddingNet** from *networks.py*
4. Define a **PairSelector** that takes embeddings and original labels and returns valid pairs within a minibatch
5. Define **OnlineContrastiveLoss** that will use a *PairSelector* and compute *ContrastiveLoss* on such pairs
6. Train the network!

In [7]:
#from datasets import BalancedBatchSampler
import numpy
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

from trainer import fit
import numpy as np
from datasets import BalancedBatchSampler
train_labels = torch.tensor(train_dataset.targets)
test_labels = torch.tensor(test_dataset.targets)

# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples from each class
# train_batch_sampler = BalancedBatchSampler(train_labels, n_classes=64, n_samples=2)
# test_batch_sampler = BalancedBatchSampler(test_labels, n_classes=64, n_samples=2)
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, **kwargs)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, **kwargs)

# Set up the network and training parameters
from networks import EmbeddingNet
from losses import OnlineTripletLoss
from utils import AllPositivePairSelector, HardNegativePairSelector, HardestNegativeTripletSelector # Strategies for selecting pairs within a minibatch

margin = 1
embedding_net = EmbeddingNet()
model = embedding_net
loss_fn = OnlineTripletLoss(margin,HardestNegativeTripletSelector(margin))
if cuda:
    model.cuda()
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 20
log_interval = 50

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [8]:
# DO not run Contrastive loss yet
fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval)



ValueError: ignored

## Online triplet selection
## Steps
1. Create **BalancedBatchSampler** - samples $N$ classes and $M$ samples *datasets.py*
2. Create data loaders with the batch sampler
3. Define **embedding** *(mapping)* network $f(x)$ - **EmbeddingNet** from *networks.py*
4. Define a **TripletSelector** that takes embeddings and original labels and returns valid triplets within a minibatch
5. Define **OnlineTripletLoss** that will use a *TripletSelector* and compute *TripletLoss* on such pairs
6. Train the network!

In [39]:
import numpy
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

from trainer import fit
import numpy as np
from datasets import BalancedBatchSampler
train_labels = torch.tensor(train_dataset.targets)
test_labels = torch.tensor(test_dataset.targets)

# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples from each class
train_batch_sampler = BalancedBatchSampler(train_labels, n_classes=64, n_samples=4)
test_batch_sampler = BalancedBatchSampler(test_labels, n_classes=64, n_samples=2)
# kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_sampler)

# Set up the network and training parameters
from networks import EmbeddingNet, EmbeddingNetL2
from losses import OnlineTripletLoss
from utils import AllTripletSelector,HardestNegativeTripletSelector, RandomNegativeTripletSelector, SemihardNegativeTripletSelector # Strategies for selecting triplets within a minibatch
from metrics import AverageNonzeroTripletsMetric

margin = 0.1
embedding_net = EmbeddingNet()
model = embedding_net
if cuda:
    model.cuda()
loss_fn = OnlineTripletLoss(margin, RandomNegativeTripletSelector(margin))
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 20
log_interval = 50

In [40]:
history = fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval, metrics=[AverageNonzeroTripletsMetric()])



Epoch: 1/20. Train set: Average loss: 0.1397	Average nonzero triplets: 376.9
Epoch: 1/20. Validation set: Average loss: 0.1120	Average nonzero triplets: 64.0
Epoch: 2/20. Train set: Average loss: 0.0980	Average nonzero triplets: 380.1
Epoch: 2/20. Validation set: Average loss: 0.0965	Average nonzero triplets: 63.8
Epoch: 3/20. Train set: Average loss: 0.0854	Average nonzero triplets: 359.8
Epoch: 3/20. Validation set: Average loss: 0.1147	Average nonzero triplets: 63.8
Epoch: 4/20. Train set: Average loss: 0.0967	Average nonzero triplets: 327.7
Epoch: 4/20. Validation set: Average loss: 0.1569	Average nonzero triplets: 62.4
Epoch: 5/20. Train set: Average loss: 0.0989	Average nonzero triplets: 299.2
Epoch: 5/20. Validation set: Average loss: 0.1436	Average nonzero triplets: 61.6
Epoch: 6/20. Train set: Average loss: 0.0978	Average nonzero triplets: 255.6
Epoch: 6/20. Validation set: Average loss: 0.1728	Average nonzero triplets: 60.4
Epoch: 7/20. Train set: Average loss: 0.1011	Average

In [33]:
len(online_test_loader)

5

## Extract Embeddings

- to-do: Verify that cosine similarity is high for within class and low for between class



In [16]:
def extract_embeddings(dataloader, model):
    with torch.no_grad():
        model.eval()
        embeddings = []
        labels = []
        for images, target in dataloader:
            if cuda:
                images = images.cuda()
            embeddings.extend(model.get_embedding(images).data.cpu().tolist())
            labels.extend(target.numpy().tolist())
    return embeddings, labels

In [41]:
train_eval_loader = data.DataLoader(train_dataset, batch_size=4, shuffle=False,  num_workers=2, drop_last=True, pin_memory=cuda)
train_emb, train_ref_label = extract_embeddings(train_eval_loader, model)

In [42]:
test_eval_loader = data.DataLoader(test_dataset, batch_size=1, shuffle=False,  num_workers=2, drop_last=True, pin_memory=cuda)
test_emb, test_ref_label = extract_embeddings(test_eval_loader, model)

# Classification

## Nearest Neighbor

In [43]:
from sklearn.neighbors import NearestNeighbors

neigh = NearestNeighbors(n_neighbors=64)
neigh.fit(train_emb, train_ref_label)

NearestNeighbors(n_neighbors=64)

In [44]:
distances_test, neighbors_test = neigh.kneighbors(test_emb)
distances_test, neighbors_test = distances_test.tolist(), neighbors_test.tolist()

## SVM

In [45]:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(train_emb, train_ref_label)

y_pred = clf.predict(train_emb)
train_acc = accuracy_score(train_ref_label, y_pred)

y_pred = clf.predict(test_emb)
test_acc = accuracy_score(test_ref_label, y_pred)

print("Training Accuracy: " + str(train_acc))
print("Testing Accuracy: " + str(test_acc))



Training Accuracy: 0.9153400868306801
Testing Accuracy: 0.45254470426409904


In [46]:
y_pred

array([ 0,  0,  2,  0,  0,  2,  2,  0,  2,  2,  2,  6,  2,  6,  0,  0,  0,
        0,  2, 17,  1,  0, 17, 17, 32, 17,  2, 52, 34,  2,  2,  1,  1,  1,
        6,  2, 32,  2,  0, 12,  1,  1,  0,  1,  0,  2,  6,  2, 52,  2, 17,
        0,  0,  0,  2,  2, 17,  6,  2, 17, 17,  2,  2, 51,  2,  0,  2,  2,
        0,  2,  2,  2, 17,  0,  2,  6, 52,  2, 17, 17, 17,  6,  2,  2,  6,
        2,  3,  3,  3,  1,  3,  3,  3,  3,  3,  0, 63, 15, 52,  5,  5,  5,
        5,  5, 62,  5, 17,  6,  6,  6,  6, 52,  6,  6,  6,  6,  6,  6,  2,
        2, 52,  6,  6,  6,  6,  6,  6,  6, 52,  6,  6,  6,  6,  6,  6, 17,
        6,  6,  6,  6, 52,  6,  7, 60,  7,  8, 32,  8, 11, 59, 59, 37, 11,
       13, 13, 32, 32, 13, 59, 14, 32, 37,  7, 23, 63, 11, 13, 11, 60, 59,
       15, 32, 36, 32, 17, 32, 32, 33, 15, 32, 39, 15, 36, 32, 15, 15, 15,
       15, 32, 15, 15, 15, 15, 15, 33, 15, 15, 36,  2, 16, 16, 16, 27, 32,
       31, 15, 15, 27, 17, 17, 52, 17,  0, 32, 17, 17, 17, 17, 17, 52, 17,
       32, 32, 59,  6,  6

In [47]:
test_ref_label

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 8,
 8,
 8,
 9,
 9,
 9,
 10,
 10,
 10,
 10,
 11,
 11,
 11,
 11,
 11,
 11,
 12,
 12,
 12,
 13,
 13,
 13,
 14,
 14,
 14,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 15,
 16,
 16,
 16,
 16,
 16,
 16,
 16,
 16,
 16,
 16,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 

### Cross-Validation

In [26]:
clf = SVC(kernel='linear', C=1, random_state=42)
scores = cross_val_score(clf, train_emb, train_ref_label, cv=5)
scores

array([0.86437613, 0.86075949, 0.86799277, 0.86075949, 0.79710145])

## Random Forest Classification

In [31]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(max_depth=15, random_state=0)
clf.fit(train_emb, train_ref_label)

y_pred = clf.predict(train_emb)
train_acc = accuracy_score(train_ref_label, y_pred)

y_pred = clf.predict(test_emb)
test_acc = accuracy_score(test_ref_label, y_pred)

print("Training Accuracy: " + str(train_acc))
print("Testing Accuracy: " + str(test_acc))

Training Accuracy: 0.9996382054992764
Testing Accuracy: 0.4828060522696011


In [None]:
model_file_name = 'leopard_model_tr.pt'
path = f"/content/drive/MyDrive/siamese-triplet/{model_file_name}" 
torch.save(model.state_dict(), path)

FileNotFoundError: ignored