# Importing the Required Libraries

In [32]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from skimage import io
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import glob
from PIL import Image
from torch.utils.data.sampler import BatchSampler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from skimage.io import imread, imshow
import cv2
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

cuda = torch.cuda.is_available()
torch.manual_seed(42)

# Making custom dataset class glvd to extract the dataset and make Triplets
We have taken 250 classes at once for our evaluation purpose as the dataset was quite big and it was computationally expensive

In [33]:
class glvd(Dataset):
    
    def __init__(self, train=True, transform=None):
        self.transform = transform
        self.train = train
        self.df = pd.read_csv("../input/landmark-retrieval-2021/train.csv")
        self.df = self.df[self.df['landmark_id']<=1000]
        if self.train:
            self.grouped = self.df.groupby(self.df['landmark_id'])
        else:
            #self.testf = [f for f in glob.glob("../input/landmark-retrieval-2021/test/"+'*/*/*/*')]
            self.testf = self.df.sample(n=500).reset_index(drop=True)
    def __len__(self):
        if self.train:
            return len(self.df)
        else:
            return len(self.testf)
    
    def __getitem__(self, idx):
        #Taking the data and making triplets of the train data and returning query image for test
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        if self.train:
            pos_grp = self.grouped.get_group(self.df['landmark_id'].iloc[idx])
            neg_grp = pd.concat([self.df, pos_grp]).drop_duplicates(keep=False).reset_index(drop=True)
            
            ahash = self.df['id'].iloc[idx]
            phash = pos_grp.sample()['id'].iloc[0]
            nhash = neg_grp.sample()['id'].iloc[0]
            
            Anchor = io.imread("../input/landmark-retrieval-2021/train/"+ahash[0]+"/"+ahash[1]+"/"+ahash[2]+"/"+ahash+".jpg")
            Positive = io.imread("../input/landmark-retrieval-2021/train/"+phash[0]+"/"+phash[1]+"/"+phash[2]+"/"+phash+".jpg")
            Negative = io.imread("../input/landmark-retrieval-2021/train/"+nhash[0]+"/"+nhash[1]+"/"+nhash[2]+"/"+nhash+".jpg")
            
            if self.transform is not None:
                Anchor = self.transform(Anchor)
                Positive = self.transform(Positive)
                Negative = self.transform(Negative)
            return (Anchor, Positive, Negative),[]
        else:
            qhash = self.testf['id'].iloc[idx]
            query = io.imread("../input/landmark-retrieval-2021/train/"+qhash[0]+"/"+qhash[1]+"/"+qhash[2]+"/"+qhash+".jpg")
            if self.transform is not None:
                    query = self.transform(query)
            return query
        
    def hashedvd(self,idx):
        #function to take the hashed format of image to print the query image in original form while retrieving
        return self.testf['id'].iloc[idx]
            

# Making custom dataset glvdk to extract the training data in form of a Tensor for making embeddings for Image Retrieval

In [34]:
class glvdk(Dataset):
    
    def __init__(self, transform=None):
        self.transform = transform
        self.df = pd.read_csv("../input/landmark-retrieval-2021/train.csv")
        self.df = self.df[self.df['landmark_id']<=250]
        
    def __len__(self):
        return len(self.df)
        
            
    
    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()    
        ahash = self.df['id'].iloc[idx]   
        Anchor = io.imread("../input/landmark-retrieval-2021/train/"+ahash[0]+"/"+ahash[1]+"/"+ahash[2]+"/"+ahash+".jpg")
            
            
        if self.transform is not None:
                Anchor = self.transform(Anchor)
                
        return Anchor,self.df['landmark_id'].iloc[idx] #returns training data with image tensor and labels

In [35]:
#transforms to be applied on our data 
trnsfm = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224,224))
 ])

# Part 1:Model Creation and Training:-

# Making the Triplet Loss Function
Triplet Loss is the Loss Function which makes sure that Given an Anchor Image A is closer to its positive image P and farther from its negative Image N so that the model maps the embeddings to similar classes
References:-[Triplet Loss](https://medium.com/analytics-vidhya/triplet-loss-b9da35be21b8)

In [36]:
class TripletLoss(nn.Module):
    def __init__(self, margin):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative, size_average=True):
        #Triplet Loss=pos_distance-neg_distance+margin
        distance_positive = (anchor - positive).pow(2).sum(1)  # .pow(.5)
        distance_negative = (anchor - negative).pow(2).sum(1)  # .pow(.5)
        losses = F.relu(distance_positive - distance_negative + self.margin)
        return losses.mean() if size_average else losses.sum()


# Making an Embedding Net to Convert Image to a Lower Dimensional Embedding for Computation

In [37]:
class EmbeddingNet(nn.Module):
    def __init__(self):
        #constructing an Embedding Network 
        super(EmbeddingNet, self).__init__()
        self.convnet=torchvision.models.resnet18(pretrained=True)
        self.convnet.fc = nn.Linear(self.convnet.fc.in_features, 2)

    def forward(self, x):
        output = self.convnet(x)
        return output

    def get_embedding(self, x):
        return self.forward(x)


class EmbeddingNetL2(EmbeddingNet):
    #Applying L2 Normalisation to our Embedding Net
    def __init__(self):
        super(EmbeddingNetL2, self).__init__()

    def forward(self, x):
        output = super(EmbeddingNetL2, self).forward(x)
        output /= output.pow(2).sum(1, keepdim=True).sqrt()
        return output

    def get_embedding(self, x):
        return self.forward(x)

# Constructing a Triplet Network which maps our embeddings to similar classes 
Triplet Network basically minimizes the distance between Anchor and Positive and maximizes the distance between Anchor and Negative thus mapping our embeddings to similar classes
We were first working on Siamese Network and implemented it in earlier baseline but we found that Triplet Net,its immediate competitor had a better training accuracy as it had three channels for A,P,N compared to Siamese which had 2(one for Anchor and other one for transferring in Pairs of positive and negative)

In [38]:
class TripletNet(nn.Module):
    def __init__(self, embedding_net):
        super(TripletNet, self).__init__()
        self.embedding_net = embedding_net

    def forward(self, x1, x2, x3):
        output1 = self.embedding_net(x1)
        output2 = self.embedding_net(x2)
        output3 = self.embedding_net(x3)
        return output1, output2, output3

    def get_embedding(self, x):
        return self.embedding_net(x)

In [39]:
#importing our train dataset in A,P,N and test dataset as Query Images
train_dataset = glvd(transform=trnsfm)
test_dataset = glvd(train=False, transform=trnsfm)
n_classes = 250

In [40]:
#Splitting our dataset as train and Validation
train_set_size = int(len(train_dataset) * 0.8)
valid_set_size = len(train_dataset) - train_set_size
train_dataset, valid_dataset = data.random_split(train_dataset, [train_set_size, valid_set_size])

In [41]:
def fit(train_loader, val_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval, metrics=[],
        start_epoch=0):
    
    for epoch in range(0, start_epoch):
        scheduler.step()

    for epoch in range(start_epoch, n_epochs):
        
        train_loss, metrics = train_epoch(train_loader, model, loss_fn, optimizer, cuda, log_interval, metrics)
        
        scheduler.step()

        # Train stage
        

        message = 'Epoch: {}/{}. Train set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, train_loss)
        for metric in metrics:
            message += '\t{}: {}'.format(metric.name(), metric.value())

        val_loss, metrics = test_epoch(val_loader, model, loss_fn, cuda, metrics)
        val_loss /= len(val_loader)

        message += '\nEpoch: {}/{}. Validation set: Average loss: {:.4f}'.format(epoch + 1, n_epochs,
                                                                                 val_loss)
        for metric in metrics:
            message += '\t{}: {}'.format(metric.name(), metric.value())

        print(message)

In [42]:
def train_epoch(train_loader, model, loss_fn, optimizer, cuda, log_interval, metrics):
    for metric in metrics:
        metric.reset()

    model.train()
    losses = []
    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        target = target if len(target) > 0 else None
        if not type(data) in (tuple, list):
            data = (data,)
        if cuda:
            data = tuple(d.cuda() for d in data)
            if target is not None:
                target = target.cuda()


        optimizer.zero_grad()
        outputs = model(*data)

        if type(outputs) not in (tuple, list):
            outputs = (outputs,)

        loss_inputs = outputs
        if target is not None:
            target = (target,)
            loss_inputs += target

        loss_outputs = loss_fn(*loss_inputs)
        loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs
        losses.append(loss.item())
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

        for metric in metrics:
            metric(outputs, target, loss_outputs)

        if batch_idx % log_interval == 0:
            
            message = 'Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                batch_idx * len(data[0]), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), np.mean(losses))
            for metric in metrics:
                message += '\t{}: {}'.format(metric.name(), metric.value())

            print(message)
            losses = []

    total_loss /= (batch_idx + 1)
    return total_loss, metrics

In [43]:
def test_epoch(val_loader, model, loss_fn, cuda, metrics):
    with torch.no_grad():
        for metric in metrics:
            metric.reset()
        model.eval()
        val_loss = 0
        for batch_idx, (data, target) in enumerate(val_loader):
            target = target if len(target) > 0 else None
            if not type(data) in (tuple, list):
                data = (data,)
            if cuda:
                data = tuple(d.cuda() for d in data)
                if target is not None:
                    target = target.cuda()

            outputs = model(*data)

            if type(outputs) not in (tuple, list):
                outputs = (outputs,)
            loss_inputs = outputs
            if target is not None:
                target = (target,)
                loss_inputs += target

            loss_outputs = loss_fn(*loss_inputs)
            loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs
            val_loss += loss.item()

            for metric in metrics:
                metric(outputs, target, loss_outputs)

    return val_loss, metrics

In [44]:
#functions to extract embeddings of train data and test data
def extract_embeddings(dataloader, model):
    with torch.no_grad():
        model.eval()
        embeddings = np.zeros((len(dataloader.dataset), 2))
        labels = np.zeros(len(dataloader.dataset))
        k = 0
        for images, target in dataloader:
            if cuda:
                images = images.cuda()
            embeddings[k:k+len(images)] = model.get_embedding(images).data.cpu().numpy()
            labels[k:k+len(images)] = target.numpy()
            k += len(images)
    return embeddings, labels



def extract_embeddings_test(dataloader, model):
    with torch.no_grad():
        model.eval()
        embeddings = np.zeros((len(dataloader.dataset), 2))
        k = 0
        for images in dataloader:
            if cuda:
                images = images.cuda()
            embeddings[k:k+len(images)] = model.get_embedding(images).data.cpu().numpy()
            k += len(images)
    return embeddings

In [45]:

batch_size = 8
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
triplet_train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
triplet_test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, **kwargs)

# Set up the network and training parameters(Hyperparameters Tuning)
margin = 1.
embedding_net = EmbeddingNet()
model = TripletNet(embedding_net)
if cuda:
    model.cuda()
loss_fn = TripletLoss(margin)
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 15
log_interval = 100

In [46]:
fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval)

# Prt 2:Extracting Embeddings of our Train and Test Data:-

In [47]:
#importing train dataset from glvdk for embeddings formation after evaluation and mapping from knn
train_datasetk=glvdk(transform=trnsfm)

In [48]:
batch_size = 256
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
train_loader = torch.utils.data.DataLoader(train_datasetk, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

In [49]:
train_embeddings_ocl, train_labels_ocl = extract_embeddings(train_loader, model)
test_embeddings_ocl = extract_embeddings_test(test_loader, model)

# Part 3:Image Retrieval

In [50]:
#transforms for Image Retrieval
trnsfmk=transforms.Compose([
    transforms.ToTensor()
    
 ])

In [51]:
#importing train dataset for Image retrieval
train_datasetr=glvdk(transform=trnsfmk)

In [52]:
num_test_queries = 1
num_elements_to_retrieve=5
qhash=test_dataset.hashedvd(80)
Q=io.imread("../input/landmark-retrieval-2021/train/"+qhash[0]+"/"+qhash[1]+"/"+qhash[2]+"/"+qhash+".jpg")
print("Query:")
plt.imshow(Q)
plt.show()
print("Retrieved Image:")
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(train_embeddings_ocl, train_labels_ocl)
indexes = knn.kneighbors(test_embeddings_ocl[80:81],num_elements_to_retrieve, return_distance=False)
for index in indexes:
        print(train_labels_ocl[index])
        n=train_labels_ocl[index[0]]
        for instance,labels in train_datasetr:
            if(n==labels):
                plt.imshow(instance.permute(1,2,0))
                plt.show()