In [None]:
pip install prettytable

In [None]:
pip install timm

In [None]:
pip install ipywidgets

In [1]:
import sys
#sys.path.append('..') # append parent directory, we need it
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.optim import lr_scheduler
import utils

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from utils.validation import get_validation_recalls
from models import helper


In [5]:
sys.path

['/home/sunveil/anaconda3/envs/boq/lib/python312.zip',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/lib-dynload',
 '',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/site-packages',
 '..',
 '/home/sunveil/anaconda3/envs/boq/lib/python3.12/site-packages/setuptools/_vendor',
 '/tmp/tmp26nlz7kf']

In [2]:
import models

AttributeError: module 'models' has no attribute 'helper'

In [17]:


class VPRModel(pl.LightningModule):
    """This is the main model for Visual Place Recognition
    we use Pytorch Lightning for modularity purposes.
    """

    def __init__(self,
                #---- Backbone
                backbone_arch='resnet50',
                pretrained=True,
                layers_to_freeze=1,
                layers_to_crop=[],
                normalize = True,
                 
                #---- Aggregator
                agg_arch='ConvAP', #CosPlace, NetVLAD, GeM, AVG
                agg_config={},

                faiss_gpu=False
                 ):
        super().__init__()
        self.encoder_arch = backbone_arch
        self.pretrained = pretrained
        self.layers_to_freeze = layers_to_freeze
        self.layers_to_crop = layers_to_crop

        self.agg_arch = agg_arch
        self.agg_config = agg_config

        self.faiss_gpu = faiss_gpu
        
        # ----------------------------------
        # get the backbone and the aggregator
        self.backbone = helper.get_backbone(backbone_arch, pretrained, layers_to_freeze, layers_to_crop, normalize)
        self.aggregator = helper.get_aggregator(agg_arch, agg_config)
        
    # the forward pass of the lightning model
    def forward(self, x):
        x = self.backbone(x)
        x = self.aggregator(x)
        return x
    
    

In [18]:
MEAN=[0.485, 0.456, 0.406]; STD=[0.229, 0.224, 0.225]

IM_SIZE = (322, 322)

def input_transform(image_size=IM_SIZE):
    return T.Compose([
         T.Resize(image_size, interpolation=T.InterpolationMode.BICUBIC),
		#T.Resize(image_size,  interpolation=T.InterpolationMode.BILINEAR),
        
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])


In [19]:
from dataloaders.val.EssexDataset import EssexDataset

In [20]:
def get_val_dataset(dataset_name, input_transform=input_transform()):
    dataset_name = dataset_name.lower()
    
    if 'cross' in dataset_name:
        ds = CrossSeasonDataset(input_transform = input_transform)
    
    elif 'essex' in dataset_name:
        ds = EssexDataset(input_transform = input_transform)
    
    elif 'inria' in dataset_name:    
        ds = InriaDataset(input_transform = input_transform)
    
    elif 'nordland' in dataset_name:    
        ds = NordlandDataset(input_transform = input_transform)
    
    elif 'sped' in dataset_name:
        ds = SPEDDataset(input_transform = input_transform)
    
    elif 'msls' in dataset_name:
        ds = MSLS(input_transform = input_transform)

    elif 'pitts' in dataset_name:
        ds = PittsburghDataset(which_ds=dataset_name, input_transform = input_transform)
    else:
        raise ValueError
    
    num_references = ds.num_references
    num_queries = ds.num_queries
    ground_truth = ds.ground_truth
    return ds, num_references, num_queries, ground_truth

In [21]:
def get_descriptors(model, dataloader, device):
    descriptors = []
    with torch.no_grad():
        for batch in tqdm(dataloader, 'Calculating descritptors...'):
            imgs, labels = batch
            output= model(imgs.to(device))[0].cpu()
            descriptors.append(output)

    return torch.cat(descriptors)

In [22]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [23]:
model = VPRModel(backbone_arch='dinov2', 
                 normalize = True,
                 agg_arch='boq',
                 agg_config={'in_channels': 768,  # make sure the backbone has out_channels attribute
                            'proj_channels':384,
                            'num_queries' : 64,
                           'num_layers' : 2,
                            'row_dim' : 12288//384}
                )

Using cache found in /home/sunveil/.cache/torch/hub/facebookresearch_dinov2_main


In [25]:
state_dict = torch.load('/media/sunveil/Data/header_detection/poddubnyy/postgraduate/VPR/Bag-of-Queries/dinov2_12288.pth') # link to the trained weights
model.load_state_dict(state_dict)
# model.load_state_dict(state_dict['state_dict'])
model.eval()
model = model.to(device)


  state_dict = torch.load('/media/sunveil/Data/header_detection/poddubnyy/postgraduate/VPR/Bag-of-Queries/dinov2_12288.pth') # link to the trained weights


In [26]:
val_dataset_name = 'essex'
batch_size = 10

val_dataset, num_references, num_queries, ground_truth = get_val_dataset(val_dataset_name)
val_loader = DataLoader(val_dataset, num_workers=4, batch_size=batch_size)

descriptors = get_descriptors(model, val_loader, device)
print(f'Descriptor dimension {descriptors.shape[1]}')

# now we split into references and queries
r_list = descriptors[ : num_references].cpu()
q_list = descriptors[num_references : ].cpu()
recalls_dict, preds = get_validation_recalls(r_list=r_list,
                                    q_list=q_list,
                                    k_values=[1, 5, 10],
                                    gt=ground_truth,
                                    print_results=True,
                                    dataset_name=val_dataset_name,
                                    )


Calculating descritptors...:   0%|          | 0/42 [00:00<?, ?it/s]

Descriptor dimension 12288


+-----------------------------------+
|        Performance on essex       |
+----------+-------+-------+--------+
|    K     |   1   |   5   |   10   |
+----------+-------+-------+--------+
| Recall@K | 90.95 | 99.52 | 100.00 |
+----------+-------+-------+--------+


In [27]:
len(val_dataset)

420

Датасет  Essex3in1 объем тестовой выборки 420 изображений

Этот набор данных сопровождает нашу работу под названием «Запоминающиеся карты: A Framework for Re-defining Places in Visual Place Recognition». PDF-файл этой работы можно найти на сайте (https://arxiv.org/abs/1811.03529).

В отличие от существующих наборов данных VPR, в которых представлены либо вариации точек зрения, либо условные вариации, ESSEX3IN1 - это первый набор данных, состоящий из изображений мест, которые являются запутанными как для VPR, так и для человеческого распознавания. Он содержит запутанные и сложные динамические объекты, естественные сцены и малоинформативные кадры. Как показано в нашей статье, большинство современных методов VPR с трудом справляются с этими запутанными изображениями.

Набор данных разделен на 2 папки. Соответствие между кадрами применяется к кадрам запроса и опорным кадрам. Изображения 0-132 в каждой папке являются запутанными, а изображения 133-209 - хорошими кадрами.

omposed of images from
indoor, outdoor and natural scenes