A collection of code snippets to investigate OSM data, find missind labels, plot the distribution of classes.

In [30]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import torch
import yaml
import sys
from torchmetrics import MetricCollection
from omegaconf import OmegaConf
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from pytorch_lightning import seed_everything
sys.path.append('fbsource/fbcode/scripts/psarlin/')

from maploc.data.loader import MapLocDataModule
from maploc.data.loader_mapillary import MapillaryDataModule
from maploc.train import GenericModule
from maploc.utils.viz_2d import plot_images, plot_keypoints, features_to_RGB, save_plot, add_text
from maploc.utils.viz_localization import likelihood_overlay, plot_pose, plot_dense_rotations
from maploc.utils.geo import Projection, BoundaryBox
from maploc.osm.download import get_osm
from maploc.osm.index import MapIndex
from maploc.osm.parser import group_elements, Groups
from maploc.osm.raster import Canvas, render_raster_map, render_raster_masks
from maploc.osm.reader import OSMData
from maploc.osm.data import MapData
from maploc.osm.tiling_v2 import TileManager
from maploc.osm.viz import Colormap, plot_nodes
torch.set_grad_enabled(False);
plt.rcParams.update({'figure.max_open_warning': 0})

# Analyze the missing labels

In [114]:
manager = TileManager.load("./data/mapillary_dumps_v2/nantes/tiles.pkl")
path = Path("/data/users/psarlin/data/osm/nantes.osm")
projection = Projection("EPSG:2154")
bbox = manager.bbox

bbox_osm = projection.unproject(bbox)
if path.is_file():
    osm = OSMData.from_file(path)
    if osm.box is not None:
        assert osm.box.contains(bbox_osm)
else:
    path = path / f"{bbox_osm}.json"
    osm = OSMData.from_dict(get_osm(bbox_osm, path))

osm.add_xy_to_nodes(projection)
data = MapData.from_osm(osm)
index = MapIndex(data)
nodes = osm.nodes.values()
ways = osm.ways.values()

from maploc.osm.analysis import plot_osm_histograms
plot_osm_histograms(osm, fontsize=4, dpi=200)

In [15]:
from maploc.mapillary.run import locations
from maploc.osm.index import OSMIndex
scenes = ["sanfrancisco_soma", "sanfrancisco_hayes"]
scenes = set(locations) - {"sanfrancisco_soma_test"}
scenes = ["amsterdam"]
print(len(scenes), scenes)
all_nodes = []
all_ways = []
for scene in scenes:
    loc = locations[scene]
    osm = OSMData.from_file(loc["osm_path"])
    projection = Projection(loc["epsg"])
    osm.add_xy_to_nodes(projection)
    index = OSMIndex(osm)
    nodes, ways = index.query(projection.project(loc["bbox"]))
    all_nodes.extend(nodes)
    all_ways.extend(ways)

In [22]:
from maploc.osm.parser import filter_node, parse_node, match_to_group, Patterns, filter_way, parse_way, filter_area, parse_area
from collections import Counter
from pprint import pprint

def filter_unlabeled(elems, parse_fn, i):
    no_label = []
    no_group = Counter()
    for node in elems:
        label = parse_fn(node.tags)
        if label is None:
            if len(node.tags) > 0:
                no_label.append(node.tags)
            continue
        if i == 0:  # node
            group = match_to_group(label, Patterns.nodes)
            if group is None:
                group = match_to_group(label, Patterns.ways)
        elif i == 1: # way
            group = match_to_group(label, Patterns.ways)
            if group is None:
                group = match_to_group(label, Patterns.nodes)
        else:
            group = match_to_group(label, Patterns.areas)
            if group is None:
                group = match_to_group(label, Patterns.ways)
            if group is None:
                group = match_to_group(label, Patterns.nodes)
        if group is None:
            no_group[label] += 1
    return no_label, no_group

In [None]:
no_label, no_group = filter_unlabeled(filter(filter_node, all_nodes), parse_node, 0)
common = sorted(((k, c) for k, c in no_group.items() if c >= 10), key=lambda x: x[1], reverse=True)[:100]
pprint(common)

In [None]:
Counter([str(n) for n in no_label if "addr:housenumber" not in n]).most_common(50)

In [255]:
no_label, no_group = filter_unlabeled(filter(filter_way, all_ways), parse_way, 1)
common = sorted(((k, c) for k, c in no_group.items() if c >= 10), key=lambda x: x[1], reverse=True)[:100]
pprint(common)

In [None]:
Counter([str(n) for n in no_label if "addr:housenumber" not in n]).most_common(50)

In [258]:
no_label, no_group = filter_unlabeled(filter(filter_area, all_ways), parse_area, 2)
common = sorted(((k, c) for k, c in no_group.items() if c >= 5), key=lambda x: x[1], reverse=True)
pprint(common)

In [273]:
[parse_area(area.tags) for area in filter(filter_area, all_ways) if area.tags.get("natural")=="water"]

In [270]:
[area.tags for area in filter(filter_area, all_ways) if area.tags.get("natural")=="water"]

In [None]:
Counter([str(n) for n in no_label if "addr:housenumber" not in n]).most_common(50)

In [None]:
from maploc.osm.analysis import plot_sankey_hierarchy

fig = plot_sankey_hierarchy(osm)
fig.write_html('./data/nantes_hierarchy.html')

In [None]:
from maploc.osm.viz import Colormap, plot_nodes
colormap = Colormap()

c = bbox.center+[100, -100]
box = BoundaryBox(c-64, c+64)
nodes, lines, areas = index.query(box)
print(len(nodes), len(lines), len(areas))
canvas = Canvas(box, 4)
masks = render_raster_masks(nodes, lines, areas, canvas)
raster = render_raster_map(masks)
map_viz = colormap.apply(raster)
plot_images([map_viz], dpi=200)
plot_nodes(0, raster[2])

In [None]:
from maploc.osm.parser import match_to_group, parse_area, Patterns
c = 0
for rel in osm.relations.values():
    if rel.tags.get("type") == "multipolygon":
        label = parse_area(rel.tags)
        if label is None:
            print(label, rel.tags)
            continue
        group = match_to_group(label, Patterns.areas)
        if group is None:
            # print("No area group", label)
            group = match_to_group(label, Patterns.ways)
        if group is None:            
            print("No area or way group", label)
            continue
        # print(group)
        ret = multipolygon_from_relation(rel)
        if ret is None:
            continue
        if group == "building":
            continue
        inners, outers = ret
        xy = np.stack([n.xy for way in inners + outers for n in way])
        box = BoundaryBox(xy.min(0), xy.max(0)) + 20
        canvas = Canvas(box, ppm=2)
        canvas.draw_multipolygon([[n.xy for n in way] for way in inners+outers])
        plot_images([canvas.raster], titles=[group])
        c+=1
        if c > 100:
            break