In [0]:
%pip install python-dotenv

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.window import Window
from delta.tables import DeltaTable
import requests
import time
from pathlib import Path
import os

In [0]:
spark = SparkSession.builder.getOrCreate()
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")

bronze_table = f"{catalog_dev}.{schema_dev}.endepunkt_bronze"
silver_table = f"{catalog_dev}.{schema_dev}.endepunkt_silver"
buffer = 20

In [0]:
def generate_dom_url(bbox):
    bbox_str = ".".join(map(str, bbox))
    width, height = [512, 512]
    return (
        f"https://wms.geonorge.no/skwms1/wms.hoyde-dom-nhm-25833?request=GetMap&Format=image/png&"
        f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=NHM_DOM_25833:skyggerelieff&"
        f"BBOX={bbox_str}&width={width}&height={height}"
    )

generate_dom_url_udf = udf(generate_dom_url, StringType())

def dom_file_exists(nodeid: str) -> str:
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/endepunkt_dom/{nodeid}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"

dom_file_status_udf = udf(dom_file_exists, StringType())

In [0]:
def generate_image_url(bbox):
    try:
        bbox_str = ".".join(map(str, bbox))
        width, height = [512, 512]
        return (
            f"https://wms.geonorge.no/skwms1/wms.nib?VERSION=1.3.0"
            f"&service=WMS&request=GetMap&Format=image/png&"
            f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=ortofoto&"
            f"BBox={bbox_str}&width={width}&height={height}&TICKET="
        )  # token legges til etter TICKET når UDF kjører
    except Exception as e:
        return "INVALID"
    
generate_image_url_udf = udf(generate_image_url, StringType())

def image_file_exists(nodeid: str) -> str:
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/endepunkt_image/{nodeid}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"

image_file_status_udf = udf(image_file_exists, StringType())

In [0]:
def add_silver_columns(df: DataFrame, buffer: int = 50, kommune_id: str = "") -> DataFrame:
    df = df.withColumn("bbox", expr(f"array(x - {buffer}, y - {buffer}, x + {buffer}, y + {buffer})"))
    df = df.withColumn("image_path", generate_image_url_udf(col("bbox"))) \
           .withColumn("dom_path", generate_dom_url_udf(col("bbox"))) \
           .withColumn("image_status", image_file_status_udf(col("nodeid"))) \
           .withColumn("dom_status", dom_file_status_udf(col("nodeid"))) \
           .withColumn("lastet_tid", current_timestamp()) \
           .withColumn("kommune_id", lit(kommune_id)) \
           .withColumn("row_hash", sha2(concat_ws("||", *df.columns), 256))
    return df

In [0]:
def write_delta_table(sdf: DataFrame, mode: str = "merge") -> None:
    if mode == "overwrite":
        sdf.write.format("delta") \
            .option("mergeSchema", "true") \
            .mode("overwrite") \
            .saveAsTable(silver_table)
    else:
        from delta.tables import DeltaTable

        delta_tbl = DeltaTable.forName(spark, silver_table)

        delta_tbl.alias("target").merge(
            sdf.alias("source"),
            condition="target.nodeid = source.nodeid" 
        ).whenMatchedUpdate(
            condition="target.hentet_tid < source.hentet_tid OR target.image_path IS NULL", 
            set={col: f"source.{col}" for col in sdf.columns}
        ).whenNotMatchedInsert(
            values={col: f"source.{col}" for col in sdf.columns}
        ).execute()


In [0]:
def process_silver_for_kommune(kommune_id: str) -> None:
    kommune_id = str(kommune_id)  # sikkerhet
    bronze_df = spark.read.table(bronze_table)
    bronze_df = bronze_df.filter(col("kommune_id") == lit(kommune_id))

    silver_df = add_silver_columns(bronze_df, buffer=buffer, kommune_id=kommune_id)

    if not spark.catalog.tableExists(silver_table):
        write_delta_table(silver_df, mode="overwrite")
        return
    
    expected_schema = spark.table(silver_table).schema

    silver_df = silver_df.select([
        lit("").cast("string").alias(c.name) if c.dataType.typeName() == "void"
        else col(c.name).cast(c.dataType)
        for c in expected_schema
    ])

    w = Window.partitionBy("nodeid").orderBy(col("hentet_tid").desc())
    silver_df = silver_df.withColumn("row_number", row_number().over(w)) \
                          .filter(col("row_number") == lit(1)) \
                          .drop("row_number")

    write_delta_table(silver_df)

In [0]:
kommune_id_rows = [
    row.asDict() for row in spark.read.table(bronze_table).select("kommune_id").distinct().collect()
]


for row in kommune_id_rows:
    print(f"Row: {row}, type: {type(row)}, kommune_id: {row['kommune_id']}, type: {type(row['kommune_id'])}")

    kommune_id = row["kommune_id"]
    if kommune_id is not None:
        kommune_id = str(kommune_id)
        process_silver_for_kommune(kommune_id)


In [0]:
# spark.sql(f"DROP TABLE IF EXISTS {silver_table}")

In [0]:
display(spark.read.table(silver_table))

In [0]:
dom_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/endepunkt_dom/"
os.makedirs(dom_output_dir, exist_ok=True)

def download_dom(dom_url: str, out_path: str) -> bool:
    try:
        r = requests.get(dom_url, timeout=10)
        if r.status_code == 200:
            with open(out_path, "wb") as f:
                f.write(r.content)
            return True
    except Exception as e:
        print(f"❌ Feil ved nedlasting av DOM: {e}")
    return False

In [0]:
silver_df_pending = spark.read.table(silver_table).filter("dom_status = 'PENDING'")
silver_rows = silver_df_pending.select("nodeid", "dom_path").collect()

print(f"Antall DOM-rader å laste ned: {len(silver_rows)}")

for row in silver_rows:
    node_id = row["nodeid"]
    dom_url = row["dom_path"]
    out_path = f"{dom_output_dir}/{node_id}.png"

    if download_dom(dom_url, out_path):
        print(f"✅ Lastet ned DOM for node {node_id}")

        DeltaTable.forName(spark, silver_table) \
            .alias("target") \
            .update(
                condition=f"nodeid = '{node_id}'",
                set={"dom_status": "'DOWNLOADED'"}
            )

In [0]:
from dotenv import load_dotenv
load_dotenv()

brukerid = os.getenv("GEONORGE_BRUKERID")
passord = os.getenv("GEONORGE_PASSORD")

def get_token():
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={brukerid}"
        f"&passord={passord}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token

token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  # sekunder

def refresh_token_if_needed():
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("🔄 Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/endepunkt_images/"
os.makedirs(image_output_dir, exist_ok=True)

silver_df_pending_img = spark.read.table(silver_table).filter(
    "image_status = 'PENDING' and dom_status = 'DOWNLOADED'"
)
silver_rows_img = silver_df_pending_img.select("nodeid", "image_path").distinct().collect()

print(f"Antall image-rader å laste ned: {len(silver_rows_img)}")

for row in silver_rows_img:
    refresh_token_if_needed()
    
    node_id = row["nodeid"]
    image_base = row["image_path"]

    if not image_base or not image_base.startswith("http"):
        print(f"❌ Ugyldig image_path for node {node_id}: {image_base}")
        continue
    
    image_url = f"{image_base}{token}"
    out_path = f"{image_output_dir}/{node_id}.png"

    try:
        r = requests.get(image_url, timeout=10)
        if r.status_code == 200:
            with open(out_path, "wb") as f:
                f.write(r.content)
            print(f"✅ Lastet ned image for node {node_id}")

            DeltaTable.forName(spark, silver_table) \
                .alias("target") \
                .update(
                    condition=f"nodeid = '{node_id}'",
                    set={"image_status": "'DOWNLOADED'"}
                )
        else:
            print(f"⚠️ Feil {r.status_code} for node {node_id}")
    except Exception as e:
        print(f"❌ Exception ved nedlasting av image for node {node_id}: {e}")

In [0]:
spark.read.table(silver_table).groupBy("dom_status", "image_status").count().show()

In [0]:
display(spark.read.table(silver_table))