## Start model training:

- Simple classifier that uses cropped images from detectron2
- Even the images that are not cropped 

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim 
from torchvision import datasets
from torch.autograd import Variable
import numpy as np
import pandas as pd
import cv2
import argparse
import os
from PIL import Image, ImageEnhance, ImageOps
from tqdm import tqdm
import random

In [2]:
load_dir = '../embeddings'
data = "../cropped_bird_dataset"
#img_size = 299
batch_size = 32
epochs = 50
lr = 0.01
momentum = 0.9
weight_decay = 3e-4
grad_clip = 5.
seed = 1
use_cuda = False
experiment='../experiment'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(seed)

<torch._C.Generator at 0x7fa19032ff00>

In [3]:
# 2-layers network of the features

# Features and labels
features_train = torch.load(os.path.join(load_dir, "birds_features_train.pt"), map_location=torch.device(device))
labels_train = torch.load(os.path.join(load_dir, "birds_labels_train.pt"), map_location=torch.device(device))

features_val = torch.load(os.path.join(load_dir, "birds_features_val.pt"), map_location=torch.device(device))
labels_val = torch.load(os.path.join(load_dir, "birds_labels_val.pt"), map_location=torch.device(device))

# X = features.to("cpu").numpy()

# Dataloaders
features_tensor = torch.stack([i for i in features_train])
labels_tensor = torch.stack([i for i in labels_train])
train_data = torch.utils.data.TensorDataset(features_tensor, labels_tensor) 

features_tensor = torch.stack([torch.Tensor(i) for i in features_val])
labels_tensor = torch.stack([i for i in labels_val])
val_data = torch.utils.data.TensorDataset(features_tensor,labels_tensor)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=0)

In [4]:
# Homemade loss

class HardBatchMiningTripletLoss(torch.nn.Module):
    """Triplet loss with hard positive/negative mining of samples in a batch.

    Reference:
        Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
    Args:
        margin (float, optional): margin for triplet. Default is 0.3.
    """

    def __init__(self, margin=0.3):
        super(HardBatchMiningTripletLoss, self).__init__()
        self.margin = margin
        self.ranking_loss = torch.nn.MarginRankingLoss(margin=margin)

    def forward(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (batch_size).
        """
        n = inputs.size(0)

        # TASK: Compute the pairwise euclidean distance between all n feature vectors.
        # Hint: We recommend computing the actual euclidean distance (not squared).
        # For numerical stability, you can do sth. like:
        # distance_matrix = distance_matrix.clamp(min=1e-12).sqrt()
        input1 = inputs
        input2 = inputs.transpose(0, 1)
        matrix_product = torch.matmul(input1, input2)
        diag = torch.diag(matrix_product)
        distance_matrix = diag.unsqueeze(0) - 2.0 * matrix_product + diag.unsqueeze(1)
        distance_matrix = distance_matrix.clamp(min=1e-12).sqrt()

        # TASK: For each sample (image), find the hardest positive and hardest negative sample.
        # The targets are a vector that encode the class label for each of the n samples.
        # Pairs of samples with the SAME class can form a positive sample.
        # Pairs of samples with a DIFFERENT class can form a negative sample.
        #
        # For this task, you will need to loop over all samples, and for each one
        # find the hardest positive sample and the hardest negative sample.
        # The distances are then added to the following lists.
        # Please think about what hardest means for positive and negative pairs.
        # Reminder: Positive pairs should be as close as possible, while
        # negative pairs should be quite far apart.

        distance_positive_pairs, distance_negative_pairs = [], []
        print(distance_matrix.shape)
        for i in range(n):
            current_label = targets[i].item()
            mask = targets.eq(current_label)
            distance_positive = torch.max(torch.masked_select(distance_matrix[i, :], mask))
            print("pos", distance_positive.shape)
            distance_negative = torch.min(torch.masked_select(distance_matrix[i, :], torch.logical_not(mask)))
            print("neg", distance_negative.shape)
            distance_positive_pairs.append(distance_positive)
            distance_negative_pairs.append(distance_negative)

        # TASK: Convert the created lists into 1D pytorch tensors. Please never
        # convert the tensors to numpy or raw python format, as you want to backpropagate
        # the loss, i.e., the above lists should only contain pytorch tensors.
        # Hint: Checkout the pytorch documentation.
        distance_positive_pairs = torch.tensor(distance_positive_pairs, device=device)
        distance_negative_pairs = torch.tensor(distance_negative_pairs, device=device)

        # The ranking loss will compute the triplet loss with the margin.
        # loss = max(0, -1*(neg_dist - pos_dist) + margin)
        # This is done already, no need to change anything.
        y = torch.ones_like(distance_negative_pairs)
        return self.ranking_loss(distance_negative_pairs, distance_positive_pairs, y)


class CombinedLoss(object):
    def __init__(self, margin=0.3, weight_triplet=1.0, weight_ce=1.5):
        super(CombinedLoss, self).__init__()
        self.triplet_loss = HardBatchMiningTripletLoss()  # <--- Your code is used here!
        self.cross_entropy = torch.nn.CrossEntropyLoss()
        self.weight_triplet = weight_triplet
        self.weight_ce = weight_ce

    def __call__(self, logits, features):
        loss = 0.0
        loss_summary = {}
        if self.weight_triplet > 0.0:
            loss_t = self.triplet_loss(features) * self.weight_triplet
            loss += loss_t
            loss_summary['Triplet Loss'] = loss_t

        if self.weight_ce > 0.0:
            loss_ce = self.cross_entropy(logits) * self.weight_ce
            loss += loss_ce
            loss_summary['CE Loss'] = loss_ce

        loss_summary['Loss'] = loss
        return loss

In [5]:
# Model
class Classifier(nn.Module):
    def __init__(self,embedding_dim):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(embedding_dim, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 20)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = Classifier(features_train[0].shape[0])

if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

# Optimizer, LR, and criterion
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")

# Training functions
def train_classifier(model, train_loader, optimizer, lr_scheduler, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = Variable(data.cuda()), Variable(target.cuda().long())
                
        else:
              data, target = Variable(data), Variable(target.long())
        optimizer.zero_grad()
        output = model(data)
        #target = target.squeeze(1)
        #loss = triplet_loss.forward(output, target)
        #loss.requires_grad = True
        loss = criterion(output, target)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        lr_scheduler.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation_classifier(model, criterion, val_loader):
    model.eval()
    validation_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            if use_cuda:
                data, target = Variable(data.cuda()), Variable(target.cuda().long())
            else:
                data, target = Variable(data), Variable(target.long())
            output = model(data)
            
            #validation_loss += triplet_loss.forward(output, target).data.item()
        
            #target = target.squeeze(1)
            # sum up batch loss
            validation_loss += criterion(output, target).data.item()
            
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    return(100. * correct / len(val_loader.dataset))

Using CPU


In [6]:
# Training the classifier 
for epoch in range(1, epochs + 1):
    train_classifier(model, train_loader, optimizer, lr_scheduler, criterion, epoch)
    val_acc=validation_classifier(model, criterion, val_loader)
    if val_acc>=68:
      # Save only when it is good enough
        model_file = experiment + '/model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
        print('Saved model to ' + model_file + '. You can run `python evaluate.py --model ' + model_file + '` to generate the Kaggle formatted csv file\n')


Validation set: Average loss: 0.1166, Accuracy: 17/103 (17%)

Validation set: Average loss: 0.1162, Accuracy: 19/103 (18%)

Validation set: Average loss: 0.1142, Accuracy: 25/103 (24%)

Validation set: Average loss: 0.1122, Accuracy: 19/103 (18%)

Validation set: Average loss: 0.1119, Accuracy: 28/103 (27%)

Validation set: Average loss: 0.1139, Accuracy: 16/103 (16%)

Validation set: Average loss: 0.1039, Accuracy: 40/103 (39%)

Validation set: Average loss: 0.1024, Accuracy: 38/103 (37%)

Validation set: Average loss: 0.1002, Accuracy: 44/103 (43%)

Validation set: Average loss: 0.0917, Accuracy: 46/103 (45%)

Validation set: Average loss: 0.0890, Accuracy: 52/103 (50%)

Validation set: Average loss: 0.0855, Accuracy: 44/103 (43%)

Validation set: Average loss: 0.0813, Accuracy: 58/103 (56%)

Validation set: Average loss: 0.0792, Accuracy: 65/103 (63%)

Validation set: Average loss: 0.0825, Accuracy: 52/103 (50%)

Validation set: Average loss: 0.0632, Accuracy: 69/103 (67%)

Validat

In [7]:
break

SyntaxError: 'break' outside loop (668683560.py, line 4)

In [None]:
break

In [None]:
# Test with Test Time Augmentation

# Test features
features_test = torch.load(os.path.join(load_dir, 'birds_features_test.pt'), map_location=torch.device(device))
features_tensor = torch.stack([i for i in features_test])

with open("../experiment/test_paths.txt", "r") as file:
    test_paths = file.read().split("\n")
    
#paths_tensor = torch.stack(test_paths)
    
best_model_path = "../experiment/model_50.pth"

# Loading trained model
state_dict = torch.load(best_model_path)
model = Classifier(features_test[0].shape[0])
model.load_state_dict(state_dict)
model.eval()

In [None]:
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

output_file = "../experiment/kaggle.csv"

with open(output_file, "w") as file:
    file.write("Id,Category\n")
    for path, embedding in tqdm(zip(test_paths, features_tensor)):
        if use_cuda:
            embedding = embedding.cuda()
        output = model(embedding)
        pred = output.data.max(0, keepdim=True)[1]
        file.write("%s,%d\n" % (path, pred))
    print(
        "Succesfully wrote "
        + output_file
        + ", you can upload this file to the kaggle competition website"
    )