In [None]:
from __future__ import annotations
from dataclasses import dataclass

import imageio
import mediapy
import json
import numpy as np
from numba import njit

In [None]:
@njit
def _point_overlap(a: np.ndarray, b: np.ndarray) -> int:
    n, i, j = 0, 0, 0
    while i < len(a) and j < len(b):
        if a[i] == b[j]:
            n += 1
            i += 1
            j += 1
        elif a[i] < b[j]:
            i += 1
        else:
            j += 1

    return n

@dataclass
class Image:
    id: int
    qvec: np.ndarray
    tvec: np.ndarray
    camera_id: int
    name: str
    n_features: int
    point3D_ids: np.ndarray # sorted

    def __repr__(self) -> str:
        ''' simplify repr by only providing id, name, number of features and number of point ids '''
        return f"Image(id={self.id}, name={self.name}, n_features={self.n_features}, n_point3D_ids={len(self.point3D_ids)})"
    
    def point_overlap(self, other: Image) -> int:
        return _point_overlap(self.point3D_ids, other.point3D_ids)

def read_images_text(path):
    images = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                image_id = int(elems[0])
                qvec = np.array(tuple(map(float, elems[1:5])))
                tvec = np.array(tuple(map(float, elems[5:8])))
                camera_id = int(elems[8])
                image_name = elems[9]
                elems = fid.readline().split()
                point3D_ids = np.array(tuple(map(int, elems[2::3])))

                n_features = len(point3D_ids)
                point3D_ids = point3D_ids[point3D_ids != -1]
                point3D_ids = point3D_ids[np.argsort(point3D_ids)]

                images[image_id] = Image(
                    id=image_id,
                    qvec=qvec,
                    tvec=tvec,
                    camera_id=camera_id,
                    name=image_name,
                    n_features=n_features,
                    point3D_ids=point3D_ids,
                )
    return images

In [None]:
with open('/cvlabdata1/cvlab/datasets_tyszkiew/compacted-datasets/megadepth/dataset.json', 'r') as json_file:
    dataset_dict = json.load(json_file)

In [None]:
scene_id = '0162'

scene_dataset_dict = dataset_dict[scene_id]
id_to_image_dataset = scene_dataset_dict['images']
id_triples = scene_dataset_dict['tuples']
json_pairs = []
for t1, t2, t3 in id_triples:
    json_pairs.extend([(id_to_image_dataset[t1], id_to_image_dataset[t2]), (id_to_image_dataset[t1], id_to_image_dataset[t3]), (id_to_image_dataset[t2], id_to_image_dataset[t3])])

In [None]:
images_in_sfm = read_images_text(f'/cvlabdata1/cvlab/datasets_tyszkiew/megadepth/MegaDepth_v1_SfM/{scene_id}/sparse/manhattan/0/images.txt')

In [None]:
image_to_id_colmap = {im.name: im.id for im in images_in_sfm.values()}

In [None]:
from tqdm.auto import tqdm

def pair_to_images(pair: tuple[str, str]) -> tuple[Image, Image]:
    return images_in_sfm[image_to_id_colmap[pair[0]]], images_in_sfm[image_to_id_colmap[pair[1]]]

json_overlaps = []
for pair in set(json_pairs):
    i1, i2 = pair_to_images(pair)
    overlap = i1.point_overlap(i2)
    json_overlaps.append(overlap)
print(len(json_overlaps))

In [None]:
from itertools import combinations

sfm_pairs = []
sfm_overlaps = []
n_iter = len(images_in_sfm) * (len(images_in_sfm) - 1) // 2
for pair in tqdm(combinations(images_in_sfm.values(), r=2), total=n_iter):
    i1, i2 = pair
    overlap = i1.point_overlap(i2)
    sfm_overlaps.append(overlap)
    sfm_pairs.append((i1.name, i2.name))

In [None]:
image_root = f'/cvlabdata1/cvlab/datasets_tyszkiew/megadepth/MegaDepth_v1_SfM/{scene_id}/images'
load = lambda im: imageio.imread(f'{image_root}/{im}')

In [None]:
import matplotlib.pyplot as plt

bins = np.arange(0, 600) + 0.5
_ = plt.hist(json_overlaps, bins=bins, histtype='step', density=True, label='disk subset')
_ = plt.hist(sfm_overlaps, bins=bins, histtype='step', density=True, label='raw megadepth')
plt.xlabel('size of intersection of landmarks')
plt.ylabel('number of pairs')
plt.legend()

In [None]:
h, _b = np.histogram(sfm_overlaps, bins=np.arange(0, 600) - 0.5)

In [None]:
sfm_pairs[5]

In [None]:
import matplotlib.pyplot as plt
sfm_overlaps = np.array(sfm_overlaps)
pairs_overlap_up_to_5, = np.where((sfm_overlaps > 10) & (sfm_overlaps <= 20))
#pairs_overlap_up_to_5, = np.where((sfm_overlaps > 1))
np.random.shuffle(pairs_overlap_up_to_5)

def show_pairs(pairs: list[tuple[str, str]]):
    n = len(pairs)
    fig, axes = plt.subplots(n, 2, figsize=(10, n * 5), tight_layout=True)
    for i, pair in enumerate(pairs):
        a1, a2 = axes[i]
        a1.imshow(load(pair[0]))
        a2.imshow(load(pair[1]))
        a1.axis('off')
        a2.axis('off')

show_pairs([sfm_pairs[i] for i in pairs_overlap_up_to_5[:5]])

In [None]:
import h5py

with h5py.File('/cvlabdata1/cvlab/datasets_tyszkiew/megadepth/MegaDepth_v1_SfM/0162/sparse/manhattan/0/split_metadata.h5', 'r') as hdf:
    new_names = [name.decode('utf-8') for name in hdf['images'][()]]
    new_pairs = []
    for pair in hdf['pairs']:
        new_pairs.append((new_names[pair[0]], new_names[pair[1]]))
    #print(hdf['images'])
    #print(hdf['pairs'].shape)

In [None]:
new_pairs_overlap = []
for pair in new_pairs:
    i1, i2 = pair_to_images(pair)
    overlap = i1.point_overlap(i2)
    new_pairs_overlap.append(overlap)

In [None]:
plt.hist(new_pairs_overlap, bins=bins, histtype='step', density=False, label='new megadepth')
plt.hist(json_overlaps, bins=bins, histtype='step', density=False, label='disk subset')
plt.legend()
None

In [None]:
added_pairs = list(set(new_pairs) - set(json_pairs))

In [None]:
show_pairs(added_pairs[:5])

In [None]:
import os
import json

all_missing_images = []
for root, dirs, files in os.walk('/cvlabdata1/cvlab/datasets_tyszkiew/megadepth/MegaDepth_v1_SfM/'):
    if 'missing_images.json' not in files:
        continue
    
    with open(os.path.join(root, 'missing_images.json'), 'r') as json_file:
        missing_file_names = json.load(json_file)

    scene_id = root.split('/')[6]
    
    image_root = f'MegaDepth_v1_SfM/{scene_id}/images'
    missing_file_paths = [os.path.join(image_root,  name) for name in missing_file_names]

    all_missing_images.extend(missing_file_paths)

with open('images_to_unpack.json', 'w') as json_file:
    json.dump(all_missing_images, json_file)

In [None]:
len(all_missing_images)

In [None]:
all_missing_images[0]

In [None]:
from create_file_lists import read_cameras_text

In [None]:
read_cameras_text('/cvlabdata1/cvlab/datasets_tyszkiew/megadepth/MegaDepth_v1_SfM/0162/sparse/manhattan/0/cameras.txt')