In [1]:
!ls

add-area.ipynb            calles_bsas_fixed.geojson [34mto_add[m[m


In [1]:
#para ver areas de cobertura usar el boton cobertus de la version local del app cargar el shapefile en jupyter es imposible

import os
import glob
import json
import hashlib
from datetime import datetime

BASE_FILE = "calles_bsas_fixed.geojson"
TO_ADD_DIR = "to_add"
BACKUP = True

EXCLUDE_HIGHWAYS = {"service"}
EXCLUDE_SERVICE_SUBTYPES = {"alley"}  # only relevant when highway=service

KEEP_GEOMS = {"LineString", "MultiLineString"}  # keep streets as lines only

def human_bytes(n: int) -> str:
    for unit in ["B","KB","MB","GB","TB"]:
        if n < 1024:
            return f"{n:.1f}{unit}" if unit != "B" else f"{n}{unit}"
        n /= 1024
    return f"{n:.1f}PB"

def load_feature_collection(path: str) -> dict:
    print(f"\n[LOAD] {path} ({human_bytes(os.path.getsize(path))})")
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    if not isinstance(data, dict) or data.get("type") != "FeatureCollection":
        raise ValueError(f"{path} is not a GeoJSON FeatureCollection")
    feats = data.get("features")
    if not isinstance(feats, list):
        raise ValueError(f"{path} FeatureCollection has no features[] list")
    print(f"  -> features: {len(feats):,}")
    return data

def is_good_street_feature(feat: dict) -> bool:
    if not isinstance(feat, dict) or feat.get("type") != "Feature":
        return False

    geom = feat.get("geometry") or {}
    gtype = geom.get("type")
    if gtype not in KEEP_GEOMS:
        return False

    props = feat.get("properties") or {}
    hw = props.get("highway")
    if not hw:
        return False

    # exclude service roads entirely, and also alleys (usually service=alley)
    if hw in EXCLUDE_HIGHWAYS:
        service_val = props.get("service")
        if service_val in EXCLUDE_SERVICE_SUBTYPES or service_val is None:
            return False

    # Overpass sometimes includes pedestrian areas as polygons w/ area=yes; we already dropped polygons by geom type
    return True

def feature_key(feat: dict) -> str:
    """
    Prefer stable OSM id when present; else hash geometry+key props.
    This prevents duplicates without exploding file size.
    """
    props = feat.get("properties") or {}
    osm_id = props.get("@id") or props.get("id")
    if osm_id:
        return f"id:{osm_id}"

    # fallback: hash geom + highway + name
    geom = feat.get("geometry") or {}
    payload = {
        "geometry": geom,
        "highway": props.get("highway"),
        "name": props.get("name"),
    }
    s = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
    return "h:" + hashlib.sha1(s.encode("utf-8")).hexdigest()

def scan_to_add(dirpath: str) -> list[str]:
    patterns = [os.path.join(dirpath, "*.geojson"), os.path.join(dirpath, "*.json")]
    files = []
    for p in patterns:
        files.extend(glob.glob(p))
    files = sorted(set(files))
    print(f"\n[SCAN] {dirpath}/ -> {len(files)} file(s)")
    for f in files[:25]:
        print("  -", f)
    if len(files) > 25:
        print(f"  ... (+{len(files)-25} more)")
    return files

def write_compact_geojson(path: str, fc: dict) -> None:
    tmp = path + ".tmp"
    print(f"\n[WRITE] writing compact GeoJSON -> {tmp}")
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(fc, f, ensure_ascii=False, separators=(",", ":"))
    os.replace(tmp, path)
    print(f"[WRITE] overwrite done -> {path} ({human_bytes(os.path.getsize(path))})")

def main():
    if not os.path.exists(BASE_FILE):
        raise FileNotFoundError(f"Base file not found: {BASE_FILE}")

    base_fc = load_feature_collection(BASE_FILE)
    base_feats = base_fc["features"]

    add_files = scan_to_add(TO_ADD_DIR)
    if not add_files:
        print("\nNothing to add. Exiting.")
        return

    print("\n[FILTER] keeping only street line features (LineString/MultiLineString), excluding service/alley...")
    kept_before = sum(1 for _ in base_feats)
    base_filtered = [f for f in base_feats if is_good_street_feature(f)]
    print(f"  Base kept: {len(base_filtered):,} / {kept_before:,}")

    # Dedup using keys
    seen = set()
    merged = []

    def add_feats(feats: list, label: str):
        nonlocal merged
        total = len(feats)
        kept = 0
        dup = 0
        bad = 0
        for i, feat in enumerate(feats, 1):
            if not is_good_street_feature(feat):
                bad += 1
                continue
            k = feature_key(feat)
            if k in seen:
                dup += 1
                continue
            seen.add(k)
            merged.append(feat)
            kept += 1
            if i % 50000 == 0:
                print(f"    {label}: processed {i:,}/{total:,} (kept {kept:,}, dup {dup:,}, drop {bad:,})")
        print(f"  {label}: kept {kept:,}, dup {dup:,}, dropped {bad:,}")

    print("\n[MERGE] base -> add_files")
    add_feats(base_feats, "BASE")

    for fpath in add_files:
        fc = load_feature_collection(fpath)
        add_feats(fc["features"], os.path.basename(fpath))

    # Update metadata lightly
    out_fc = {
        "type": "FeatureCollection",
        "generator": base_fc.get("generator", "merge_geojson_overpass.py"),
        "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z",
        "features": merged,
    }

    # optional backup
    if BACKUP:
        backup_path = BASE_FILE.replace(".geojson", "") + ".bak.geojson"
        print(f"\n[BACKUP] saving -> {backup_path}")
        with open(backup_path, "w", encoding="utf-8") as f:
            json.dump(base_fc, f, ensure_ascii=False, separators=(",", ":"))
        print(f"[BACKUP] done ({human_bytes(os.path.getsize(backup_path))})")

    write_compact_geojson(BASE_FILE, out_fc)
    print("\n✅ Done.")

if __name__ == "__main__":
    main()




[LOAD] calles_bsas_fixed.geojson (87.4MB)
  -> features: 233,016

[SCAN] to_add/ -> 3 file(s)
  - to_add/export-2.geojson
  - to_add/export-3.geojson
  - to_add/export.geojson

[FILTER] keeping only street line features (LineString/MultiLineString), excluding service/alley...
  Base kept: 209,377 / 233,016

[MERGE] base -> add_files
    BASE: processed 50,000/233,016 (kept 49,091, dup 0, drop 909)
    BASE: processed 100,000/233,016 (kept 95,979, dup 0, drop 4,021)
    BASE: processed 150,000/233,016 (kept 139,837, dup 0, drop 10,163)
    BASE: processed 200,000/233,016 (kept 183,445, dup 0, drop 16,555)
  BASE: kept 209,377, dup 0, dropped 23,639

[LOAD] to_add/export-2.geojson (17.1MB)
  -> features: 21,373
  export-2.geojson: kept 11,875, dup 9,471, dropped 27

[LOAD] to_add/export-3.geojson (9.3MB)
  -> features: 10,975
  export-3.geojson: kept 3,810, dup 7,162, dropped 3

[LOAD] to_add/export.geojson (21.6MB)
  -> features: 26,403
  export.geojson: kept 17,551, dup 8,833, dropped

  "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z",


[BACKUP] done (80.3MB)

[WRITE] writing compact GeoJSON -> calles_bsas_fixed.geojson.tmp
[WRITE] overwrite done -> calles_bsas_fixed.geojson (85.4MB)

✅ Done.


In [2]:
!pwd

/Users/main/Desktop/geojson-repo/bianca-geodata
