In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import requests
import time
from pathlib import Path
import os

In [0]:
spark = SparkSession.builder.getOrCreate()
catalog_dev = "land_topografisk-gdb_dev"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")

bronze_table = f"{catalog_dev}.{schema_dev}.endepunkt_bronze"
silver_table = f"{catalog_dev}.{schema_dev}.endepunkt_silver"
buffer = 20

In [0]:
def add_silver_columns(df, buffer=50):
    df = df.withColumn("bbox", expr(f"array(x - {buffer}, y - {buffer}, x + {buffer}, y + {buffer})"))
    df = df.withColumn("image_path", lit(None)) \
           .withColumn("dom_path", lit(None)) \
           .withColumn("image_status", lit("PENDING")) \
           .withColumn("dom_status", lit("PENDING")) \
           .withColumn("lastet_tid", current_timestamp()) \
           .withColumn("row_hash", sha2(concat_ws("||", *df.columns), 256))
    return df


In [0]:
def write_delta_table(sdf):
    if not spark.catalog.tableExists(silver_table):
        sdf.write.format("delta").mode("overwrite").saveAsTable(silver_table)
    else:
        from delta.tables import DeltaTable
        delta_tbl = DeltaTable.forName(spark, silver_table)
        delta_tbl.alias("target").merge(
            sdf.alias("source"),
            condition="target.nodeid = source.nodeid"
        ).whenMatchedUpdate(
            condition="target.row_hash != source.row_hash",
            set={col: f"source.{col}" for col in sdf.columns}
        ).whenNotMatchedInsert(
            values={col: f"source.{col}" for col in sdf.columns}
        ).execute()

In [0]:
bronze_df = spark.read.table(bronze_table)
silver_df = add_silver_columns(bronze_df, buffer=buffer)

write_delta_table(silver_df)
print(f"✅ Endepunkt silver opprettet eller oppdatert med {silver_df.count()} rader.")