[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AnyLoc/AnyLoc/blob/main/demo/anyloc_vlad_generate_colab.ipynb)

# AnyLoc VLAD DINOv2 Descriptors

Given a folder of images, this notebook generates global descriptors per image and stores the result in another folder. The global descriptors are created using VLAD over DINOv2 features from a particular layer and facet of transformer (default is from the paper).


## Setup


### Google Colab

- Run this section only if running this notebook on Google Colab.
- If you're running this section on your local machine, jump to `Downloading data` sub-section.

In [None]:
# Manage directory for saving things
from google.colab import drive
drive.mount('/content/gdrive')
save_path = '/content/gdrive/My Drive/Colab Notebooks/Anyloc'  # Change 'YourDirectory' to your desired directory

Mounted at /content/gdrive


In [None]:
import os
import requests
import shutil

# Check if 'utilities.py' already exists in the specified Google Drive directory
google_drive_path = '/content/gdrive/My Drive/Colab Notebooks/Anyloc'  # Change to your desired directory

# Check if 'utilities.py' already exists in the current working directory
colab_path = '/content'
filename = 'utilities.py'

if os.path.isfile(os.path.join(google_drive_path, filename)):
    print(f'Found {filename} in Google Drive')
else:
    print(f"Could not find {filename} in Google Drive, downloading it")
    url = "https://raw.githubusercontent.com/AnyLoc/AnyLoc/main/demo/utilities.py"
    file_data = requests.get(url, allow_redirects=True)

    with open(os.path.join(google_drive_path, filename), 'wb') as handler:
        handler.write(file_data.content)
    print(f"{filename} saved in Google Drive directory:", google_drive_path)

# Check if 'utilities.py' already exists in the current working directory
if os.path.isfile(os.path.join(colab_path, filename)):
    print(f'Found {filename} in the current working directory')
else:
    print(f"Could not find {filename} in the current working directory, copying it from Google Drive")
    shutil.copy(os.path.join(google_drive_path, filename), os.path.join(colab_path, filename))
    print(f"{filename} copied to the current working directory:", colab_path)


Found utilities.py in Google Drive
Could not find utilities.py in the current working directory, copying it from Google Drive
utilities.py copied to the current working directory: /content


In [None]:
# Ensure that utilities.py module is there
import os
import requests
if os.path.isfile('utilities.py'):
    print('Found utilities.py')
else:
    print("Could not find utilities.py, downloading it")
    url = "https://raw.githubusercontent.com/AnyLoc/AnyLoc/main/demo/utilities.py"
    file_data = requests.get(url, allow_redirects=True)
    with open('utilities.py', 'wb') as handler:
        handler.write(file_data.content)

Found utilities.py


In [None]:
print("Verifying NVIDIA GPU is available")
!nvidia-smi -L
print("Please see that the GPU has at least 16 GB VRAM free")
!nvidia-smi

Verifying NVIDIA GPU is available
GPU 0: Tesla T4 (UUID: GPU-a3a1009d-cabd-d20f-9ef9-d6523e46a5ec)
Please see that the GPU has at least 16 GB VRAM free
Mon Dec  4 18:32:00 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                

Ensure that packages are installed

In [None]:
# Install other things
print("Trying to access utility libraries")
try:
    import einops
    import fast_pytorch_kmeans
    import distinctipy
    import onedrivedownloader
    print("Can access utility libraries")
except ImportError:
    print("Installing utility libraries")
    !pip install fast_pytorch_kmeans
    !pip install einops
    !pip install distinctipy
    !pip install onedrivedownloader

Trying to access utility libraries
Installing utility libraries
Collecting fast_pytorch_kmeans
  Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)
Collecting pynvml (from fast_pytorch_kmeans)
  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pynvml, fast_pytorch_kmeans
Successfully installed fast_pytorch_kmeans-0.2.0.1 pynvml-11.5.0
Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.7.0
Collecting distinctipy
  Downloading distinctipy-1.2.3-py3-none-any.whl (25 kB)
Installing collected packages: distinctipy
Successfully installed distinctipy-1.2.3
Collecting onedrivedownloader
  Downloading onedrivedownloade

#### Downloading Data

Downloading

- `cache`: Vocabulary (cluster centers) and test images
- `data`: Images that we'll use for testing


In [None]:
# Ensurer that everything went smoothly
import glob
_ex = lambda x: os.path.realpath(os.path.expanduser(x))
cache_dir: str = _ex("./cache")
#imgs_dir = _ex("./data/CityCenter/Images/")
imgs_dir = _ex("/content/gdrive/MyDrive/Colab Notebooks/Anyloc/42")
#assert os.path.isdir(cache_dir), "Cache directory not found"
assert os.path.isdir(imgs_dir), "Invalid unzipping"
num_imgs = len(glob.glob(f"{imgs_dir}/*.jpg"))
print(f"Found {num_imgs} images in {imgs_dir}")

Found 1590 images in /content/gdrive/MyDrive/Colab Notebooks/Anyloc/42


### Import Everything

In [None]:
# Import everything
import numpy as np
import cv2 as cv
import torch
from torch import nn
from torch.nn import functional as F
from torchvision import transforms as tvf
from torchvision.transforms import functional as T
from PIL import Image
import matplotlib.pyplot as plt
import distinctipy as dipy
from tqdm.auto import tqdm
from typing import Literal, List
import os
import natsort
import shutil
from copy import deepcopy
# DINOv2 imports
from utilities import DinoV2ExtractFeatures
from utilities import VLAD
import einops as ein

## Building Global Descriptors

Save global descriptors as numpy arrays to a directory (mirroring the directory structure of the dataset).


In [None]:
# Program parameters
save_dir = _ex("/content/gdrive/MyDrive/Colab Notebooks/Anyloc/42trainedon39output")
device = torch.device("cuda")
# Dino_v2 properties (parameters)
desc_layer: int = 31
desc_facet: Literal["query", "key", "value", "token"] = "value"
num_c: int = 32
# Domain for use case (deployment environment)
# domain: Literal["aerial", "indoor", "urban"] = "aerial"
# Maximum image dimension
max_img_size: int = 1024

In [None]:
# Ensure inputs are fine
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
    print(f"Creating directory: {save_dir}")
else:
    print("Save directory already exists, overwriting possible!")

Save directory already exists, overwriting possible!


### DINOv2 Extractor

DINOv2 extractor and the base transformation (for each image)

In [None]:
# DINO extractor
if "extractor" in globals():
    print(f"Extractor already defined, skipping")
else:
    extractor = DinoV2ExtractFeatures("dinov2_vitg14", desc_layer,
        desc_facet, device=device)
# Base image transformations
base_tf = tvf.Compose([
    tvf.ToTensor(),
    tvf.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225])
])

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vitg14_pretrain.pth
100%|██████████| 4.23G/4.23G [00:25<00:00, 179MB/s]


### VLAD object

For forming global descriptors. Also loads the cluster centers (vocabulary) for VLAD.


In [None]:
num_c = 32
cache_dir = None

In [None]:
vlad = VLAD(num_c, desc_dim=None, cache_dir = cache_dir)

VLAD caching is disabled.


In [None]:
imgs_dir_db = _ex("/content/gdrive/MyDrive/Colab Notebooks/Anyloc/training39")
img_db_names = glob.glob(f"{imgs_dir_db}/*.jpg")

In [None]:
# torch.cuda.empty_cache()

In [None]:
# Get cluster centers in the VLAD

#print("Building VLAD cluster centers...")
#db_indices = np.arange(0, num_db, largs.sub_sample_db_vlad)
# Database descriptors (for VLAD clusters): [n_db, n_d, d_dim]
#full_db_vlad = extract_patch_descriptors()
patch_descs = []

for img_fname in tqdm(img_db_names[:19]):
  with torch.no_grad():
    pil_img = Image.open(img_fname).convert('RGB')
    img_pt = base_tf(pil_img).to(device)
    if max(img_pt.shape[-2:]) > max_img_size:
        c, h, w = img_pt.shape
        # Maintain aspect ratio
        if h == max(img_pt.shape[-2:]):
            w = int(w * max_img_size / h)
            h = max_img_size
        else:
            h = int(h * max_img_size / w)
            w = max_img_size
        print(f"To {(h, w) =}")
        img_pt = T.resize(img_pt, (h, w),
                interpolation=T.InterpolationMode.BICUBIC)
        print(f"Resized {img_fname} to {img_pt.shape = }")
  c, h, w = img_pt.shape
  h_new, w_new = (h // 14) * 14, (w // 14) * 14
  img_in = tvf.CenterCrop((h_new, w_new))(img_pt)[None, ...]
  ret = extractor(img_in)
  patch_descs.append(ret.cpu())

patch_descs = torch.cat(patch_descs, dim=0) # [N, n_p, d_dim]

full_db_vlad = patch_descs
# torch.save(full_db_vlad,"full_db_vlad_dinov2_128.pt")
print(f"Database (for VLAD) shape: {full_db_vlad.shape}")
d_dim = full_db_vlad.shape[2]
print(f"Descriptor dimensionality: {d_dim}")
vlad.fit(ein.rearrange(full_db_vlad, "n k d -> (n k) d"))

  0%|          | 0/19 [00:00<?, ?it/s]

Database (for VLAD) shape: torch.Size([19, 2584, 1536])
Descriptor dimensionality: 1536


In [None]:
# VLAD object
#vlad = VLAD(num_c, desc_dim=None,
#        cache_dir=os.path.dirname(c_centers_file))
# Fit (load) the cluster centers (this'll also load the desc_dim)
#vlad.fit(None)

Using cached cluster centers
Desc dim set to 1536


### Global Descriptor Generation

Main generation stage. Creating global descriptors only for the first 20 images here.

In [None]:
img_fnames = glob.glob(f"{imgs_dir}/*.jpg")
img_fnames = natsort.natsorted(img_fnames)
descs = []
for img_fname in tqdm(img_fnames[890:1590]):
  # DINO features
  with torch.no_grad():
      pil_img = Image.open(img_fname).convert('RGB')
      img_pt = base_tf(pil_img).to(device)
      if max(img_pt.shape[-2:]) > max_img_size:
          c, h, w = img_pt.shape
          # Maintain aspect ratio
          if h == max(img_pt.shape[-2:]):
              w = int(w * max_img_size / h)
              h = max_img_size


          else:
              h = int(h * max_img_size / w)
              w = max_img_size
          print(f"To {(h, w) =}")
          img_pt = T.resize(img_pt, (h, w),
                  interpolation=T.InterpolationMode.BICUBIC)
          print(f"Resized {img_fname} to {img_pt.shape = }")


      # Make image patchable (14, 14 patches)
      c, h, w = img_pt.shape
      h_new, w_new = (h // 14) * 14, (w // 14) * 14
      img_pt = tvf.CenterCrop((h_new, w_new))(img_pt)[None, ...]
      # Extract descriptor
      ret = extractor(img_pt) # [1, num_patches, desc_dim]
  # VLAD global descriptor
  gd = vlad.generate(ret.cpu().squeeze()) # VLAD: shape [agg_dim]
  gd_np = gd.numpy()[np.newaxis, ...] # shape: [1, agg_dim]
  descs.append(gd_np)
  np.save(f"{save_dir}/{os.path.basename(img_fname)}.npy", gd_np)

  0%|          | 0/700 [00:00<?, ?it/s]

img_fnames = glob.glob(f"{imgs_dir}/*.jpg")
img_fnames = natsort.natsorted(img_fnames)
descs = []
for img_fname in tqdm(img_fnames[:1590]):
    # DINO features
    with torch.no_grad():
        pil_img = Image.open(img_fname).convert('RGB')
        img_pt = base_tf(pil_img).to(device)
        if max(img_pt.shape[-2:]) > max_img_size:
            c, h, w = img_pt.shape
            # Maintain aspect ratio
            if h == max(img_pt.shape[-2:]):
                w = int(w * max_img_size / h)
                h = max_img_size


            else:
                h = int(h * max_img_size / w)
                w = max_img_size
            print(f"To {(h, w) =}")
            img_pt = T.resize(img_pt, (h, w),
                    interpolation=T.InterpolationMode.BICUBIC)
            print(f"Resized {img_fname} to {img_pt.shape = }")


        # Make image patchable (14, 14 patches)
        c, h, w = img_pt.shape
        h_new, w_new = (h // 14) * 14, (w // 14) * 14
        img_pt = tvf.CenterCrop((h_new, w_new))(img_pt)[None, ...]
        # Extract descriptor
        ret = extractor(img_pt) # [1, num_patches, desc_dim]
    # VLAD global descriptor
    gd = vlad.generate(ret.cpu().squeeze()) # VLAD: shape [agg_dim]
    gd_np = gd.numpy()[np.newaxis, ...] # shape: [1, agg_dim]
    descs.append(gd_np)
    np.save(f"{save_dir}/{os.path.basename(img_fname)}.npy", gd_np)

Done