In this notebook, we will build a 3D map of a scene from a small set of images and then localize an image downloaded from the Internet. This demo was contributed by [Philipp Lindenberger](https://github.com/Phil26AT/).

# Setup
Here we define some output paths.

In [6]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
import os
import json
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
from pathlib import Path
import numpy as np

from hloc import (
    extract_features,
    match_features,
    reconstruction,
    visualization,
    pairs_from_exhaustive,
    pairs_from_sequence,
)
from hloc.visualization import plot_images, read_image
from hloc.utils import viz_3d

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:

images = Path("datasets/carV3")
outputs = Path("outputs/carV3/")
!rm -rf $outputs
sfm_pairs = outputs / "pairs-sfm.txt"
loc_pairs = outputs / "pairs-loc.txt"
sfm_dir = outputs / "sfm"
features = outputs / "features.h5"
matches = outputs / "matches.h5"
bbox_file = images / "bbox_dict.json"

feature_conf = extract_features.confs["disk"]
matcher_conf = match_features.confs["disk+lightglue"]

# 3D mapping
First we list the images used for mapping. These are all day-time shots of Sacre Coeur.

In [8]:
# references = sorted([p.relative_to(images).as_posix() for p in (images / "images/").iterdir()])
references = []
for root, _, files in os.walk(images / "images/"):
    for fname in files:
        if fname.endswith(".jpg") or fname.endswith(".png"):
            references.append(os.path.join(root.replace(images.as_posix()+'/', ''), fname))
references = sorted(references)
print(references)
print(len(references), "mapping images")
# plot_images([read_image(images / r) for r in references], dpi=25)
bboxes = {}
if os.path.exists(bbox_file):
    with open(bbox_file, 'r') as f:
        dic = json.load(f)
    bboxes = {}
    for k, v in dic.items():
        index = int(k[:2])
        # key = os.path.join('images/02', k) if index > 60 else os.path.join('images/01', k)
        key = os.path.join('images/', k)
        bboxes[key] = v
    print(len(bboxes), "bounding boxes")
print(bboxes)



['images/61.jpg', 'images/62.jpg', 'images/63.jpg', 'images/64.jpg', 'images/65.jpg', 'images/66.jpg', 'images/67.jpg', 'images/68.jpg', 'images/69.jpg', 'images/70.jpg', 'images/71.jpg', 'images/72.jpg', 'images/73.jpg', 'images/74.jpg', 'images/75.jpg', 'images/76.jpg', 'images/77.jpg', 'images/78.jpg', 'images/79.jpg', 'images/80.jpg', 'images/81.jpg', 'images/82.jpg', 'images/83.jpg', 'images/84.jpg', 'images/85.jpg', 'images/86.jpg', 'images/87.jpg', 'images/88.jpg', 'images/89.jpg', 'images/90.jpg', 'images/91.jpg']
31 mapping images
91 bounding boxes
{'images/53.jpg': [1010, 543, 1622, 948], 'images/54.jpg': [812, 531, 1770, 989], 'images/55.jpg': [866, 540, 1715, 991], 'images/56.jpg': [909, 534, 1676, 1002], 'images/57.jpg': [949, 537, 1629, 1003], 'images/58.jpg': [1004, 545, 1570, 1003], 'images/59.jpg': [1051, 528, 1521, 944], 'images/60.jpg': [1041, 542, 1499, 1023], 'images/61.jpg': [512, 901, 892, 1302], 'images/01.jpg': [971, 545, 1523, 996], 'images/02.jpg': [925, 539,

Then we extract features and match them across image pairs. Since we deal with few images, we simply match all pairs exhaustively. For larger scenes, we would use image retrieval, as demonstrated in the other notebooks.

In [9]:
# bboxes = None
extract_features.main(
    feature_conf, images, image_list=references, feature_path=features, bboxes=bboxes
)
# pairs_from_exhaustive.main(sfm_pairs, image_list=references)
pairs_from_sequence.main(sfm_pairs, image_list=references, features=features, window_size=3)
match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

[2024/08/27 17:53:22 hloc INFO] Extracting local features with configuration:
{'model': {'max_keypoints': 5000, 'name': 'disk'},
 'output': 'feats-disk',
 'preprocessing': {'grayscale': False, 'resize_max': 1600}}


  0%|          | 0/31 [00:00<?, ?it/s]

[2024/08/27 17:53:24 hloc INFO] Finished exporting features.
[2024/08/27 17:53:24 hloc INFO] Found 87 pairs.
[2024/08/27 17:53:24 hloc INFO] Matching local features with configuration:
{'model': {'features': 'disk', 'name': 'lightglue'},
 'output': 'matches-disk-lightglue'}

FlashAttention is not available. For optimal speed, consider installing torch >= 2.0 or flash-attn.



  0%|          | 0/87 [00:00<?, ?it/s]

[2024/08/27 17:53:27 hloc INFO] Finished exporting matches.


The we run incremental Structure-From-Motion and display the reconstructed 3D model.

In [10]:
# model = reconstruction.main(
#     sfm_dir, images, sfm_pairs, features, matches, image_list=references
# )
model = reconstruction.main(
    sfm_dir, images, sfm_pairs, features, matches,  image_list=references,
    # min_match_score = 0.5)
    # camera_mode='PER_FOLDER', min_match_score = 0.4)
    camera_mode='SINGLE', min_match_score = 0.5)
    # camera_mode='AUTO', min_match_score = 0.5)
import pycolmap
# model = pycolmap.Reconstruction(sfm_dir/'models/0')
fig = viz_3d.init_figure()
viz_3d.plot_reconstruction(
    fig, model, color="rgba(255,0,0,0.5)", name="images", points_rgb=True
)
fig.show()

[2024/08/27 17:53:30 hloc INFO] Creating an empty database...
[2024/08/27 17:53:30 hloc INFO] Importing images into the database...
[2024/08/27 17:53:31 hloc INFO] Importing features into the database...


  0%|          | 0/31 [00:00<?, ?it/s]

[2024/08/27 17:53:31 hloc INFO] Importing matches into the database...


  0%|          | 0/87 [00:00<?, ?it/s]

[2024/08/27 17:53:31 hloc INFO] Performing geometric verification of the matches...
I20240827 17:53:31.657357 180671 misc.cc:198] 
Custom feature matching
I20240827 17:53:31.669857 180671 feature_matching.cc:1021] Matching block [1/1]
I20240827 17:53:31.687426 180671 feature_matching.cc:46]  in 0.018s
I20240827 17:53:31.692888 180671 timer.cc:91] Elapsed time: 0.001 [minutes]
[2024/08/27 17:53:31 hloc INFO] Running 3D reconstruction...
I20240827 17:53:31.731660 180864 misc.cc:198] 
Loading database
I20240827 17:53:31.768481 180864 database_cache.cc:54] Loading cameras...
I20240827 17:53:31.771219 180864 database_cache.cc:64]  1 in 0.003s
I20240827 17:53:31.771241 180864 database_cache.cc:72] Loading matches...
I20240827 17:53:31.773720 180864 database_cache.cc:78]  87 in 0.002s
I20240827 17:53:31.773742 180864 database_cache.cc:94] Loading images...
I20240827 17:53:31.795120 180864 database_cache.cc:143]  31 in 0.021s (connected 31)
I20240827 17:53:31.795148 180864 database_cache.cc:15

[31mApplication will exit soon in 1800 seconds which is set by env"ORION_TASK_IDLE_TIME".[0m


: 

We also visualize which keypoints were triangulated into the 3D model.

In [None]:
visualization.visualize_sfm_2d(model, images, color_by="visibility", n=2)

# Localization
Now that we have a 3D map of the scene, we can localize any image. To demonstrate this, we download [a night-time image from Wikimedia](https://commons.wikimedia.org/wiki/File:Paris_-_Basilique_du_Sacr%C3%A9_Coeur,_Montmartre_-_panoramio.jpg).

In [None]:
url = "https://upload.wikimedia.org/wikipedia/commons/5/53/Paris_-_Basilique_du_Sacr%C3%A9_Coeur%2C_Montmartre_-_panoramio.jpg"
# try other queries by uncommenting their url
# url = "https://upload.wikimedia.org/wikipedia/commons/5/59/Basilique_du_Sacr%C3%A9-C%C5%93ur_%285430392880%29.jpg"
# url = "https://upload.wikimedia.org/wikipedia/commons/8/8e/Sacr%C3%A9_C%C5%93ur_at_night%21_%285865355326%29.jpg"
query = "query/night.jpg"
!mkdir -p $images/query && wget $url -O $images/$query -q
plot_images([read_image(images / query)], dpi=75)

Again, we extract features for the query and match them exhaustively.

In [None]:
extract_features.main(
    feature_conf, images, image_list=[query], feature_path=features, overwrite=True
)
pairs_from_exhaustive.main(loc_pairs, image_list=[query], ref_list=references)
match_features.main(
    matcher_conf, loc_pairs, features=features, matches=matches, overwrite=True
);

We read the EXIF data of the query to infer a rough initial estimate of camera parameters like the focal length. Then we estimate the absolute camera pose using PnP+RANSAC and refine the camera parameters.

In [None]:
import pycolmap
from hloc.localize_sfm import QueryLocalizer, pose_from_cluster

camera = pycolmap.infer_camera_from_image(images / query)
ref_ids = [model.find_image_with_name(r).image_id for r in references]
conf = {
    "estimation": {"ransac": {"max_error": 12}},
    "refinement": {"refine_focal_length": True, "refine_extra_params": True},
}
localizer = QueryLocalizer(model, conf)
ret, log = pose_from_cluster(localizer, query, camera, ref_ids, features, matches)

print(f'found {ret["num_inliers"]}/{len(ret["inliers"])} inlier correspondences.')
visualization.visualize_loc_from_log(images, query, log, model)

We visualize the correspondences between the query images a few mapping images. We can also visualize the estimated camera pose in the 3D map.

In [None]:
pose = pycolmap.Image(cam_from_world=ret["cam_from_world"])
viz_3d.plot_camera_colmap(
    fig, pose, camera, color="rgba(0,255,0,0.5)", name=query, fill=True
)
# visualize 2D-3D correspodences
inl_3d = np.array(
    [model.points3D[pid].xyz for pid in np.array(log["points3D_ids"])[ret["inliers"]]]
)
viz_3d.plot_points(fig, inl_3d, color="lime", ps=1, name=query)
fig.show()