In [6]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import sys
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
sys.path.append('fbsource/fbcode/surreal/')
from maploc.data import MapillaryDataModule
from maploc.utils.viz_2d import plot_images, features_to_RGB, save_plot, add_text
from maploc.utils.io import read_image
plt.rcParams.update({'figure.max_open_warning': 0})

In [None]:
from mapillary.data_migration.tools.query_mapillary import run_query, sql_string_set, sql_int_set
timeout = 300

query = """
SHOW COLUMNS FROM mly_chunks_xdb_export
"""
query = """
SHOW COLUMNS FROM mly_images_xdb_export_signal_converted
"""
results = run_query(query, timeout)
results

In [138]:
import math
from tqdm import tqdm
from mapillary.data_migration.tools.query_mapillary import run_query, sql_string_set, sql_int_set
timeout = 300

def get_image_key2fbid(image_keys):
    q = f"""
    SELECT image_key, fbid
    FROM mly_images_xdb_export_signal_converted AS image
    WHERE
        image.image_key in {sql_string_set(image_keys)}
    GROUP BY image_key, fbid
    """
    ret = run_query(q, timeout)
    return {d["image_key"]: d["fbid"] for d in ret}

def get_image_key2fbid_chunked(image_keys, chunk_size=25000):
    out = {}
    num_chunks = math.ceil(len(image_keys)/chunk_size)
    for i in tqdm(range(num_chunks)):
        out.update(get_image_key2fbid(image_keys[i*chunk_size:(i+1)*chunk_size]))
    assert len(image_keys) == len(out), (len(image_keys), len(out))
    return out

In [158]:
def strip_camera_suffix(s):
    return s.rsplit("_", 1)[0]

In [95]:
import json
locations = ["sanfrancisco_soma", "sanfrancisco_hayes", "amsterdam", "berlin", "lemans", "montrouge", "toulouse", "nantes", "vilnius", "avignon", "helsinki", "milan", "paris", "brussels"]
dumps = {}
for scene in locations:
    with open(f"data/mapillary_dumps_v4/{scene}/outputs_per_sequence.json", "r") as fid:
        dumps[scene] = json.load(fid)

all_image_keys = list({
    strip_camera_suffix(k)
    for dump in dumps.values()
    for seq, per_seq in dump.items()
    for k in per_seq["views"]
})

key2fbid = get_image_key2fbid_chunked(all_image_keys)

In [161]:
import json
split_path = Path("data/mapillary_dumps_v4/splits_mly14_from-v2.json")
with open(split_path, "r") as fid:
    splits = json.load(fid)

from collections import defaultdict
scene_to_sequences = defaultdict(set)
scene_to_image_id_keys = {}

for split_name in ["val", "train"]:
    print(f"Working on {split_name}")
    scene_to_image_keys = defaultdict(list)
    for scene, seq, key in splits[split_name]:
        scene_to_image_keys[scene].append(key)
        scene_to_sequences[scene].add(seq)
    scene_to_image_id_keys[split_name] = {}
    for scene, image_keys in scene_to_image_keys.items():
        print(scene, len(image_keys))
        ids_keys = [(key2fbid[strip_camera_suffix(k)], k) for k in image_keys]
        scene_to_image_id_keys[split_name][scene] = ids_keys

In [192]:
from maploc.utils.io import write_json
bbox_path = Path("data/mapillary_dumps_v4/bboxes.json")
with open(bbox_path, "r") as fid:
    bbox_dict = json.load(fid)

output = {}
for scene in scene_to_sequences:
    image_id_to_pano_offset = {
        key2fbid[strip_camera_suffix(k)]: view.get("panorama_offset", None)
        for seq, per_seq in dumps[scene].items()
        for k, view in per_seq["views"].items()
    }
    output[scene] = {
        "sequences": list(scene_to_sequences[scene]),
        "splits": {
            "val": scene_to_image_id_keys["val"][scene],
            "train": scene_to_image_id_keys["train"][scene],
        },
        "id_to_pano_offset": image_id_to_pano_offset,
        "bbox_total_latlon": bbox_dict[scene]["total_latlon"],
        "bbox_val_local": bbox_dict[scene]["val_local"],
        "epsg": bbox_dict[scene]["epsg"],
    }
write_json(Path("data/mapillary_dumps_v4/all_fbids.json"), output)

In [194]:
!du -hs "data/mapillary_dumps_v4/all_fbids.json"

# Check by querying the MLY API

In [208]:
image_id, key = output["paris"]["splits"]["val"][100]

In [209]:
import requests
from libfb.py.certpathpicker.cert_path_picker import get_client_credential_paths
FWDPROXY_PORT = 8082
FWDPROXY_HOSTNAME = "https://fwdproxy"
FB_CA_BUNDLE = "/var/facebook/rootcanal/ca.pem"

def get_request(url, params):
    thrift_cert, thrift_key = get_client_credential_paths()
    fwdproxy_url = f"{FWDPROXY_HOSTNAME}:{FWDPROXY_PORT}"
    proxies = {"http": fwdproxy_url, "https": fwdproxy_url}
    client_cert = (thrift_cert, thrift_key)
    response = requests.get(
        url, params=params, proxies=proxies, verify=FB_CA_BUNDLE, cert=client_cert
    )
    return response.json()

token = "TOKEN"
url = "https://graph.mapillary.com/{}"
params = {
    "access_token": token,
    "fields": "id,camera_parameters,camera_type,captured_at,compass_angle,geometry,computed_compass_angle,computed_geometry,computed_rotation,thumb_2048_url,thumb_original_url,sequence,sfm_cluster",
}

ret = get_request(url.format(image_id), params)
print(ret)
sequence_id = ret["sequence"]
ret["thumb_2048_url"]

In [None]:
plot_images([read_image(f"data/mapillary_dumps_v4/paris/images/{key}.jpg")])

In [None]:
url = "https://graph.mapillary.com/images"
params = {
    "access_token": token,
    "sequence_ids": sequence_id,
    "fields": "id",
}
ret = get_request(url.format(image_id), params)
image_ids_in_sequence = [d["id"] for d in ret["data"]]
print(image_ids_in_sequence)