In [None]:
pip install prettytable

In [None]:
pip install timm

In [None]:
pip install ipywidgets

In [3]:
import sys
#sys.path.append('..') # append parent directory, we need it
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.optim import lr_scheduler
import utils

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from utils.validation import get_validation_recalls
from models import helper


In [5]:
sys.path

['/home/sunveil/anaconda3/envs/boq/lib/python312.zip',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/lib-dynload',
 '',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/site-packages',
 '..',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/site-packages/setuptools/_vendor',
 '/tmp/tmp26nlz7kf']

In [6]:
import models

AttributeError: module 'models' has no attribute 'helper'

In [4]:


class VPRModel(pl.LightningModule):
    """This is the main model for Visual Place Recognition
    we use Pytorch Lightning for modularity purposes.
    """

    def __init__(self,
                #---- Backbone
                backbone_arch='resnet50',
                pretrained=True,
                layers_to_freeze=1,
                layers_to_crop=[],
                normalize = True,
                 
                #---- Aggregator
                agg_arch='ConvAP', #CosPlace, NetVLAD, GeM, AVG
                agg_config={},
                
                #---- Train hyperparameters
                lr=0.03, 
                optimizer='sgd',
                weight_decay=1e-3,
                momentum=0.9,
                warmpup_steps=500,
                milestones=[5, 10, 15],
                lr_mult=0.3,
                
                #----- Loss
                loss_name='MultiSimilarityLoss', 
                miner_name='MultiSimilarityMiner', 
                miner_margin=0.1,
                faiss_gpu=False
                 ):
        super().__init__()
        self.encoder_arch = backbone_arch
        self.pretrained = pretrained
        self.layers_to_freeze = layers_to_freeze
        self.layers_to_crop = layers_to_crop

        self.agg_arch = agg_arch
        self.agg_config = agg_config

        self.lr = lr
        self.optimizer = optimizer
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.warmpup_steps = warmpup_steps
        self.milestones = milestones
        self.lr_mult = lr_mult

        self.loss_name = loss_name
        self.miner_name = miner_name
        self.miner_margin = miner_margin
        
        self.save_hyperparameters() # write hyperparams into a file
        
        self.loss_fn = utils.get_loss(loss_name)
        self.miner = utils.get_miner(miner_name, miner_margin)
        self.batch_acc = [] # we will keep track of the % of trivial pairs/triplets at the loss level 

        self.faiss_gpu = faiss_gpu
        
        # ----------------------------------
        # get the backbone and the aggregator
        self.backbone = helper.get_backbone(backbone_arch, pretrained, layers_to_freeze, layers_to_crop, normalize)
        self.aggregator = helper.get_aggregator(agg_arch, agg_config)
        
    # the forward pass of the lightning model
    def forward(self, x):
        x = self.backbone(x)
        x = self.aggregator(x)
        return x
    
    # configure the optimizer 
    def configure_optimizers(self):
        if self.optimizer.lower() == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), 
                                        lr=self.lr, 
                                        weight_decay=self.weight_decay, 
                                        momentum=self.momentum)
        elif self.optimizer.lower() == 'adamw':
            optimizer = torch.optim.AdamW(self.parameters(), 
                                        lr=self.lr, 
                                        weight_decay=self.weight_decay)
        elif self.optimizer.lower() == 'adam':
            optimizer = torch.optim.AdamW(self.parameters(), 
                                        lr=self.lr, 
                                        weight_decay=self.weight_decay)
        else:
            raise ValueError(f'Optimizer {self.optimizer} has not been added to "configure_optimizers()"')
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=self.milestones, gamma=self.lr_mult)
        return [optimizer], [scheduler]
    
    # configure the optizer step, takes into account the warmup stage
    def optimizer_step(self,  epoch, batch_idx,
                        optimizer, optimizer_idx, optimizer_closure,
                        on_tpu, using_native_amp, using_lbfgs):
        # warm up lr
        if self.trainer.global_step < self.warmpup_steps:
            lr_scale = min(1., float(self.trainer.global_step + 1) / self.warmpup_steps)
            for pg in optimizer.param_groups:
                pg['lr'] = lr_scale * self.lr
        optimizer.step(closure=optimizer_closure)
        
    #  The loss function call (this method will be called at each training iteration)
    def loss_function(self, descriptors, labels):
        # we mine the pairs/triplets if there is an online mining strategy
        if self.miner is not None:
            miner_outputs = self.miner(descriptors, labels)
            loss = self.loss_fn(descriptors, labels, miner_outputs)
            
            # calculate the % of trivial pairs/triplets 
            # which do not contribute in the loss value
            nb_samples = descriptors.shape[0]
            nb_mined = len(set(miner_outputs[0].detach().cpu().numpy()))
            batch_acc = 1.0 - (nb_mined/nb_samples)

        else: # no online mining
            loss = self.loss_fn(descriptors, labels)
            batch_acc = 0.0
            if type(loss) == tuple: 
                # somes losses do the online mining inside (they don't need a miner objet), 
                # so they return the loss and the batch accuracy
                # for example, if you are developping a new loss function, you might be better
                # doing the online mining strategy inside the forward function of the loss class, 
                # and return a tuple containing the loss value and the batch_accuracy (the % of valid pairs or triplets)
                loss, batch_acc = loss

        # keep accuracy of every batch and later reset it at epoch start
        self.batch_acc.append(batch_acc)
        # log it
        self.log('b_acc', sum(self.batch_acc) /
                len(self.batch_acc), prog_bar=True, logger=True)
        return loss
    
    # This is the training step that's executed at each iteration
    def training_step(self, batch, batch_idx):
        places, labels = batch
        
        # Note that GSVCities yields places (each containing N images)
        # which means the dataloader will return a batch containing BS places
        BS, N, ch, h, w = places.shape
        
        # reshape places and labels
        images = places.view(BS*N, ch, h, w)
        labels = labels.view(-1)

        # Feed forward the batch to the model
        descriptors = self(images) # Here we are calling the method forward that we defined above
        loss = self.loss_function(descriptors, labels) # Call the loss_function we defined above
        
        self.log('loss', loss.item(), logger=True)
        return {'loss': loss}
    
    # This is called at the end of eatch training epoch
    def training_epoch_end(self, training_step_outputs):
        # we empty the batch_acc list for next epoch
        self.batch_acc = []

    # For validation, we will also iterate step by step over the validation set
    # this is the way Pytorch Lghtning is made. All about modularity, folks.
    def validation_step(self, batch, batch_idx, dataloader_idx=None):
        places, _ = batch
        # calculate descriptors
        descriptors = self(places)
        return descriptors.detach().cpu()
    
    def validation_epoch_end(self, val_step_outputs):
        """at the end of each validation epoch
        descriptors are returned in their order
        depending on how the validation dataset is implemented 
        for this project (MSLS val, Pittburg val), it is always references then queries.
        For example, if we have n references and m queries, we will get 
        the descriptors for each val_dataset in a list as follows: 
        [R1, R2, ..., Rn, Q1, Q2, ..., Qm]
        we then split it to references=[R1, R2, ..., Rn] and queries=[Q1, Q2, ..., Qm]
        to calculate recall@K using the ground truth provided.
        """
        dm = self.trainer.datamodule
        # The following line is a hack: if we have only one validation set, then
        # we need to put the outputs in a list (Pytorch Lightning does not do it presently)
        if len(dm.val_datasets)==1: # we need to put the outputs in a list
            val_step_outputs = [val_step_outputs]
        
        for i, (val_set_name, val_dataset) in enumerate(zip(dm.val_set_names, dm.val_datasets)):
            feats = torch.concat(val_step_outputs[i], dim=0)
            
            num_references = val_dataset.num_references
            num_queries = val_dataset.num_queries
            ground_truth = val_dataset.ground_truth
            
            # split to ref and queries    
            r_list = feats[ : num_references]
            q_list = feats[num_references : ]

            recalls_dict, predictions = utils.get_validation_recalls(r_list=r_list, 
                                                q_list=q_list,
                                                k_values=[1, 5, 10, 15, 20, 25],
                                                gt=ground_truth,
                                                print_results=True,
                                                dataset_name=val_set_name,
                                                faiss_gpu=self.faiss_gpu
                                                )
            del r_list, q_list, feats, num_references, ground_truth

            self.log(f'{val_set_name}/R1', recalls_dict[1], prog_bar=False, logger=True)
            self.log(f'{val_set_name}/R5', recalls_dict[5], prog_bar=False, logger=True)
            self.log(f'{val_set_name}/R10', recalls_dict[10], prog_bar=False, logger=True)
        print('\n\n')
            

In [5]:
MEAN=[0.485, 0.456, 0.406]; STD=[0.229, 0.224, 0.225]

IM_SIZE = (322, 322)

def input_transform(image_size=IM_SIZE):
    return T.Compose([
         T.Resize(image_size, interpolation=T.InterpolationMode.BICUBIC),
		#T.Resize(image_size,  interpolation=T.InterpolationMode.BILINEAR),
        
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])


In [6]:
from dataloaders.val.EssexDataset import EssexDataset

In [7]:
def get_val_dataset(dataset_name, input_transform=input_transform()):
    dataset_name = dataset_name.lower()
    
    if 'cross' in dataset_name:
        ds = CrossSeasonDataset(input_transform = input_transform)
    
    elif 'essex' in dataset_name:
        ds = EssexDataset(input_transform = input_transform)
    
    elif 'inria' in dataset_name:    
        ds = InriaDataset(input_transform = input_transform)
    
    elif 'nordland' in dataset_name:    
        ds = NordlandDataset(input_transform = input_transform)
    
    elif 'sped' in dataset_name:
        ds = SPEDDataset(input_transform = input_transform)
    
    elif 'msls' in dataset_name:
        ds = MSLS(input_transform = input_transform)

    elif 'pitts' in dataset_name:
        ds = PittsburghDataset(which_ds=dataset_name, input_transform = input_transform)
    else:
        raise ValueError
    
    num_references = ds.num_references
    num_queries = ds.num_queries
    ground_truth = ds.ground_truth
    return ds, num_references, num_queries, ground_truth

In [15]:
def get_descriptors(model, dataloader, device):
    descriptors = []
    with torch.no_grad():
        for batch in tqdm(dataloader, 'Calculating descritptors...'):
            imgs, labels = batch
            output= model(imgs.to(device))[0].cpu()
            descriptors.append(output)

    return torch.cat(descriptors)

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [10]:
model = VPRModel(backbone_arch='dinov2', 
                 normalize = True,
                 agg_arch='boq',
                 agg_config={'in_channels': 768,  # make sure the backbone has out_channels attribute
                            'proj_channels':384,
                            'num_queries' : 64,
                           'num_layers' : 2,
                            'row_dim' : 12288//384}
                )

Using cache found in /home/sunveil/.cache/torch/hub/facebookresearch_dinov2_main


In [11]:
state_dict = torch.load('/media/sunveil/Data/header_detection/poddubnyy/postgraduate/VPR/Bag-of-Queries/dinov2_12288.pth') # link to the trained weights
model.load_state_dict(state_dict)
# model.load_state_dict(state_dict['state_dict'])
model.eval()
model = model.to(device)


  state_dict = torch.load('/media/sunveil/Data/header_detection/poddubnyy/postgraduate/VPR/Bag-of-Queries/dinov2_12288.pth') # link to the trained weights


In [16]:
val_dataset_name = 'essex'
batch_size = 10

val_dataset, num_references, num_queries, ground_truth = get_val_dataset(val_dataset_name)
val_loader = DataLoader(val_dataset, num_workers=4, batch_size=batch_size)

descriptors = get_descriptors(model, val_loader, device)
print(f'Descriptor dimension {descriptors.shape[1]}')

# now we split into references and queries
r_list = descriptors[ : num_references].cpu()
q_list = descriptors[num_references : ].cpu()
recalls_dict, preds = get_validation_recalls(r_list=r_list,
                                    q_list=q_list,
                                    k_values=[1, 5, 10],
                                    gt=ground_truth,
                                    print_results=True,
                                    dataset_name=val_dataset_name,
                                    )


Calculating descritptors...:   0%|          | 0/42 [00:00<?, ?it/s]

Descriptor dimension 12288


+-----------------------------------+
|        Performance on essex       |
+----------+-------+-------+--------+
|    K     |   1   |   5   |   10   |
+----------+-------+-------+--------+
| Recall@K | 90.95 | 99.52 | 100.00 |
+----------+-------+-------+--------+


In [18]:
len(val_dataset)

420

Датасет  Essex3in1 объем тестовой выборки 420 изображений

Этот набор данных сопровождает нашу работу под названием «Запоминающиеся карты: A Framework for Re-defining Places in Visual Place Recognition». PDF-файл этой работы можно найти на сайте (https://arxiv.org/abs/1811.03529).

В отличие от существующих наборов данных VPR, в которых представлены либо вариации точек зрения, либо условные вариации, ESSEX3IN1 - это первый набор данных, состоящий из изображений мест, которые являются запутанными как для VPR, так и для человеческого распознавания. Он содержит запутанные и сложные динамические объекты, естественные сцены и малоинформативные кадры. Как показано в нашей статье, большинство современных методов VPR с трудом справляются с этими запутанными изображениями.

Набор данных разделен на 2 папки. Соответствие между кадрами применяется к кадрам запроса и опорным кадрам. Изображения 0-132 в каждой папке являются запутанными, а изображения 133-209 - хорошими кадрами.

omposed of images from
indoor, outdoor and natural scenes