In [0]:
# Databricks Notebook: Parallell nedlasting av ortofoto, DOM og maskegenerering

import os
import numpy as np
import geopandas as gpd
from PIL import Image
from io import BytesIO
import urllib.request
from shapely.geometry import box
from rasterio.features import rasterize
from rasterio.transform import from_bounds
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import src.config as cfg

In [0]:
# === Konfigurasjon ===
IMAGE_SIZE = (512, 512)
BASE_IMAGE_URL = "https://wms.geonorge.no/skwms1/wms/ortofoto"
BASE_DOM_URL = "https://wms.geonorge.no/skwms1/wms"
SECRET_TOKEN = cfg.SECRET_TOKEN
GEOJSON_PATH = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/raw_geojson/turning_spaces.geojson"

IMG_DIR = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/img"
DOM_DIR = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/dom"
MASK_DIR = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/lab"

os.makedirs(IMG_DIR, exist_ok=True)
os.makedirs(DOM_DIR, exist_ok=True)
os.makedirs(MASK_DIR, exist_ok=True)

In [0]:
# === URL-generator ===
def get_url(bbox, token, dom=False):
    bbox_str = ",".join(map(str, bbox))
    w, h = IMAGE_SIZE
    if dom:
        return f"{BASE_DOM_URL}-dom-nhm-25833&TICKET={token}&request=GetMap&Format=image/png&GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=NHM_DOM_25833:skyggerelieff&BBox={bbox_str}&width={w}&height={h}"
    else:
        return f"{BASE_IMAGE_URL}?VERSION=1.3.0&TICKET={token}&service=WMS&request=GetMap&Format=image/png&GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=ortofoto&BBox={bbox_str}&width={w}&height={h}"

In [0]:
# === Last ned og prosesser en enkelt bbox ===
def process_bbox(index_bbox):
    idx, bbox = index_bbox
    try:
        bbox_str = "_".join(map(lambda x: str(int(x)), bbox))
        img_path = f"{IMG_DIR}/image_{bbox_str}.png"
        dom_path = f"{DOM_DIR}/dom_{bbox_str}.png"
        mask_path = f"{MASK_DIR}/mask_{bbox_str}.png"

        # Hopp over om alt finnes
        if os.path.exists(img_path) and os.path.exists(dom_path) and os.path.exists(mask_path):
            return f"⏭️ Hopper over {bbox_str}"

        # Nedlasting
        for url, out_path, rgb in [
            (get_url(bbox, SECRET_TOKEN, dom=False), img_path, "RGB"),
            (get_url(bbox, SECRET_TOKEN, dom=True), dom_path, "L")
        ]:
            with urllib.request.urlopen(url) as response:
                img = Image.open(BytesIO(response.read())).convert(rgb)
                img.save(out_path)

        # Maske
        gdf = gpd.read_file(GEOJSON_PATH).to_crs("EPSG:25833")
        transform = from_bounds(*bbox, width=IMAGE_SIZE[0], height=IMAGE_SIZE[1])
        geom_clip = gdf[gdf.geometry.intersects(box(*bbox))].geometry
        if geom_clip.empty:
            mask = np.zeros((IMAGE_SIZE[1], IMAGE_SIZE[0]), dtype=np.uint8)
        else:
            mask = rasterize(
                [(geom, 1) for geom in geom_clip],
                out_shape=(IMAGE_SIZE[1], IMAGE_SIZE[0]),
                transform=transform,
                fill=0,
                dtype="uint8",
            )
        Image.fromarray(mask * 255).save(mask_path)
        return f"✅ Lagret {bbox_str}"
    except Exception as e:
        return f"❌ Feil {idx}: {e}"

In [0]:
# === Hent bbox ===
gdf = gpd.read_file(GEOJSON_PATH).to_crs("EPSG:25833")
def make_bbox(poly, buffer=20):
    minx, miny, maxx, maxy = poly.bounds
    return [minx - buffer, miny - buffer, maxx + buffer, maxy + buffer]

bbox_list = [(i, make_bbox(row.geometry)) for i, row in gdf.iterrows()]

# === Kjør parallelt ===
print(f"🔄 Starter nedlasting av {len(bbox_list)} bboxes...")

results = []
with ThreadPoolExecutor() as executor:  # bruker automatisk optimalisert antall tråder
    futures = [executor.submit(process_bbox, pair) for pair in bbox_list]
    for f in as_completed(futures):
        results.append(f.result())
        
print(f"\n📝 Ferdig. Resultater:")
for r in results:
    print(r)