In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread

import pycolmap

from megadepth.metrics.metadata import collect_metrics

In [None]:
from pathlib import Path
import h5py
import numpy as np
import torch
import collections.abc as collections
from hloc.utils.io import list_h5_names
from hloc.utils.read_write_model import read_images_binary
from hloc.utils.read_write_model import read_images_binary
from hloc.utils.io import list_h5_names
from hloc.utils.parsers import parse_image_lists

db_descriptors = None
descriptors = Path("../data/0229/features/netvlad.h5")
num_matched = 100
output = Path("../data/retrieval.txt")
query_prefix = None
query_list = None
db_prefix = None
db_list = None
db_model = None
db_descriptors = None


base = "../data"
scene = "0229"
image_dir = os.path.join(base, scene, "images")

In [None]:
def parse_names(prefix, names, names_all):
    if prefix is not None:
        if not isinstance(prefix, str):
            prefix = tuple(prefix)
        names = [n for n in names_all if n.startswith(prefix)]
        if len(names) == 0:
            raise ValueError(f"Could not find any image with the prefix `{prefix}`.")
    elif names is not None:
        if isinstance(names, (str, Path)):
            names = parse_image_lists(names)
        elif isinstance(names, collections.Iterable):
            names = list(names)
        else:
            raise ValueError(
                f"Unknown type of image list: {names}."
                "Provide either a list or a path to a list file."
            )
    else:
        names = names_all
    return names

In [None]:
def get_descriptors(names, path, name2idx=None, key="global_descriptor"):
    if name2idx is None:
        with h5py.File(str(path), "r", libver="latest") as fd:
            desc = [fd[n][key].__array__() for n in names]
    else:
        desc = []
        for n in names:
            with h5py.File(str(path[name2idx[n]]), "r", libver="latest") as fd:
                desc.append(fd[n][key].__array__())
    return torch.from_numpy(np.stack(desc, 0)).float()

In [None]:
if db_descriptors is None:
    db_descriptors = descriptors
if isinstance(db_descriptors, (Path, str)):
    db_descriptors = [db_descriptors]
name2db = {n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)}
db_names_h5 = list(name2db.keys())
query_names_h5 = list_h5_names(descriptors)

if db_model:
    images = read_images_binary(db_model / "images.bin")
    db_names = [i.name for i in images.values()]
else:
    db_names = parse_names(db_prefix, db_list, db_names_h5)
if len(db_names) == 0:
    raise ValueError("Could not find any database image.")
query_names = parse_names(query_prefix, query_list, query_names_h5)

device = "cuda" if torch.cuda.is_available() else "cpu"
db_desc = get_descriptors(db_names, db_descriptors, name2db)
query_desc = get_descriptors(query_names, descriptors)
sim = torch.einsum("id,jd->ij", query_desc.to(device), db_desc.to(device))

# Avoid self-matching
self = np.array(query_names)[:, None] == np.array(db_names)[None]

In [None]:
sim

In [None]:
# create a histogram of the scores for each row
# then run clustering on the histograms

import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# create a histogram of the scores for each row
descriptors = sim.numpy()
histograms = np.zeros((descriptors.shape[0], 50))
for i in range(descriptors.shape[0]):
    histograms[i] = np.histogram(descriptors[i], bins=50, range=(0, 1))[0]

In [None]:
# run clustering on the histograms
kmeans = KMeans(n_clusters=2, random_state=0).fit(histograms)

df = pd.DataFrame()
df["image"] = query_names
df["cluster"] = kmeans.labels_
df["score"] = mean_per_img
df["registered"] = df["image"].apply(lambda x: x in r_img_names)

In [None]:
df = df.sort_values(by=["cluster", "score"], ascending=[True, True])

df

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.bar(df.image, df.score, color=df.registered.map({True: "green", False: "red"}))

ax.set_xticks([])
plt.show()

In [None]:
target = df["registered"]
pred = df["cluster"]

from sklearn.metrics import classification_report

print(classification_report(target, pred, target_names=["unregistered", "registered"]))

In [None]:
mean_per_img = torch.sum(sim > 0.2, dim=1) / sim.shape[1]

In [None]:
mean_per_img = sim.mean(dim=1)
mean_per_img.shape

In [None]:
img_names = [
    img
    for img in os.listdir(os.path.join(base, scene, "images"))
    if img.endswith(".jpg") or img.endswith(".JPG") or img.endswith(".png")
]

In [None]:
df = pd.DataFrame({"query": query_names, "mean_per_img": mean_per_img, "registered": False})
df.set_index("query", inplace=True)
df.registered = df.index.isin(r_img_names)
df = df.sort_values(by=["registered", "mean_per_img"], ascending=[True, True])
df

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.bar(df.index, df.mean_per_img, color=df.registered.map({True: "green", False: "red"}))

ax.set_xticks([])
plt.show()

In [None]:
df[~df.registered][:10]

In [None]:
# plot top 25 unreigstered images

cdf = df[~df.registered][:25]

fig, ax = plt.subplots(5, 5, figsize=(10, 10))
for i, (name, row) in enumerate(cdf.iterrows()):
    ax[i // 5, i % 5].imshow(imread(os.path.join(image_dir, name)))
    ax[i // 5, i % 5].set_title(f"{100*row.mean_per_img:.1f} %")
    ax[i // 5, i % 5].set_xticks([])
    ax[i // 5, i % 5].set_yticks([])

plt.show()

In [None]:
cdf = df[~df.registered][-25:]

fig, ax = plt.subplots(5, 5, figsize=(10, 10))
for i, (name, row) in enumerate(cdf.iterrows()):
    ax[i // 5, i % 5].imshow(imread(os.path.join(image_dir, name)))
    ax[i // 5, i % 5].set_title(f"{100*row.mean_per_img:.1f} %")
    ax[i // 5, i % 5].set_xticks([])
    ax[i // 5, i % 5].set_yticks([])

plt.show()

In [None]:
cdf = df[df.registered][-25:]

fig, ax = plt.subplots(5, 5, figsize=(10, 10))
for i, (name, row) in enumerate(cdf.iterrows()):
    ax[i // 5, i % 5].imshow(imread(os.path.join(image_dir, name)))
    ax[i // 5, i % 5].set_title(f"{100*row.mean_per_img:.1f} %")
    ax[i // 5, i % 5].set_xticks([])
    ax[i // 5, i % 5].set_yticks([])

plt.show()

In [None]:
cdf = df[df.registered][:25]

fig, ax = plt.subplots(5, 5, figsize=(10, 10))
for i, (name, row) in enumerate(cdf.iterrows()):
    ax[i // 5, i % 5].imshow(imread(os.path.join(image_dir, name)))
    ax[i // 5, i % 5].set_title(f"{100*row.mean_per_img:.1f} %")
    ax[i // 5, i % 5].set_xticks([])
    ax[i // 5, i % 5].set_yticks([])

plt.show()

In [None]:
def plot_images(images: list, titles: list, dimension: tuple) -> None:
    fig = plt.figure(figsize=(30, 30))

    for i in range(dimension[0]):
        for j in range(dimension[1]):
            idx = 1 + j + (i * dimension[1])
            fig.add_subplot(dimension[0], dimension[1], idx)
            plt.axis("off")
            plt.imshow(images[idx - 1])
            plt.title(titles[idx - 1])

    plt.show()

In [None]:
base = "../data"
scene = "0229"
retrieval_model = "cosplace-50"
full_model = "superpoint_max-superglue-netvlad-50"
# full_model += ("-" if not retrieval_model=="" else "") + retrieval_model
# full_model = "superpoint_max-superglue"
image_dir = os.path.join(base, scene, "images")

In [None]:
img_names = [
    img
    for img in os.listdir(os.path.join(base, scene, "images"))
    if img.endswith(".jpg") or img.endswith(".JPG") or img.endswith(".png")
]

In [None]:
reconstruction = pycolmap.Reconstruction(os.path.join(base, scene, "sparse", full_model))
images = reconstruction.images
cameras = reconstruction.cameras

In [None]:
r_img_names = []
for key in images.values():
    r_img_names.append(key.name)

In [None]:
retrievals = pd.read_table(
    os.path.join(base, scene, "matches", "retrieval", retrieval_model + ".txt"),
    sep=" ",
    names=["from", "to"],
)

In [None]:
retrievals

In [None]:
df = retrievals.groupby("from").count().sort_values(by="to", ascending=False)
df["registered"] = df.index.isin(r_img_names)
df = df.sort_values(by=["registered", "to"], ascending=[True, True])
df

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
ax.bar(df.index, df["to"], color=df["registered"].map({True: "green", False: "red"}))

ax.set_xticks([])

ax.set_ylabel("Number of retrievals")

plt.show()

In [None]:
res = pd.crosstab(retrievals["from"], retrievals["to"])
res = res.reindex(index=img_names, columns=img_names, fill_value=0)

In [None]:
retrieval_scores = res.sum(axis=0) / len(img_names)

In [None]:
diff = np.setdiff1d(img_names, r_img_names)
diff_mask = retrieval_scores.index.isin(diff)

In [None]:
retrieval_scores

In [None]:
retrieval_scores[diff_mask].mean()

In [None]:
retrieval_scores = retrieval_scores.sort_values()
diff_mask = retrieval_scores.index.isin(diff)

In [None]:
retrieval_scores = pd.concat([retrieval_scores[~diff_mask], retrieval_scores[diff_mask]])
diff_mask = retrieval_scores.index.isin(diff)

In [None]:
fig = plt.figure(figsize=(15, 8))
ax = plt.gca()
ax.axes.xaxis.set_ticks([])
plt.bar(
    retrieval_scores.index,
    retrieval_scores.values,
    color=["orange" if idx == True else "blue" for idx in diff_mask],
)
plt.show()

In [None]:
colors = []
labels = []
for index, value in retrieval_scores[diff_mask].nlargest(10).items():
    colors.append(imread(os.path.join(image_dir, index)))
    labels.append(value)

plot_images(colors, labels, (5, 2))

In [None]:
query_img = retrieval_scores[diff_mask].idxmax()
all = pd.concat([retrievals[retrievals["to"] == query_img]])
all

Unregistered image with largest number of retrieval associations

In [None]:
colors = []
labels = []
for image in all.iloc[:10, 0]:
    colors.append(imread(os.path.join(image_dir, image)))

labels = np.arange(len(colors))

plot_images(colors, labels, (5, 2))