In [1]:
%%configure -f
{
    "conf":
    {
        "spark.driver.extraJavaOptions" : "--add-opens java.base/jdk.internal.loader=ALL-UNNAMED",
        "spark.jars.packages": "com.google.protobuf:protobuf-java:3.25.5"
    }
}

StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, -1, Finished, Available, Finished)

In [2]:
# Import the required geoanalytics_fabric modules
# ESRI - FABRIC reference: https://developers.arcgis.com/geoanalytics-fabric/

import geoanalytics_fabric
from geoanalytics_fabric.sql import functions as ST
from geoanalytics_fabric import extensions


StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 3, Finished, Available, Finished)

In [3]:
# Read building BAG data ROI

path_buildings = "Files/Results/Buildings/Loppersum/buildings_older_2009.parquet"
df_buildings = spark.read.format("geoparquet").load(path_buildings)

StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 4, Finished, Available, Finished)

In [4]:
# Read building BAG+AHN data

# Load geo-parquet file, with BAG polygonen
results_ahn2_path = "Files/Results/lidar/Loppersum/Building_aggregations/ahn2.parquet"
results_ahn3_path = "Files/Results/lidar/Loppersum/Building_aggregations/ahn3.parquet"
results_ahn4_path = "Files/Results/lidar/Loppersum/Building_aggregations/ahn4.parquet"

df_buildings_T1 = spark.read.format("geoparquet").load(results_ahn2_path)
df_buildings_T2 = spark.read.format("geoparquet").load(results_ahn3_path)
df_buildings_T3 = spark.read.format("geoparquet").load(results_ahn4_path)


StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 5, Finished, Available, Finished)

In [129]:
# show number of lidar points per data set 
import pandas as pd
pandas_df = pd.DataFrame({
'#BAG-objects': [df_buildings.count()],
'T1': [df_buildings_T1.count()],
'T2': [df_buildings_T2.count()],
'T3': [df_buildings_T3.count()]
})

# Convert Pandas DataFrame to PySpark DataFrame
dfFromPandas = spark.createDataFrame(pandas_df)
dfFromPandas.show()


StatementMeta(, 6bdce31e-6e08-4796-afda-1ec569ed3fd5, 130, Finished, Available, Finished)

+------------+----+----+----+
|#BAG-objects|  T1|  T2|  T3|
+------------+----+----+----+
|        1196|1196|1109|1196|
+------------+----+----+----+



In [5]:
# import required modules
from pyspark.sql.functions import col

df_buildings = df_buildings.withColumn("centroid",ST.centroid("geometry"))

cols = ("feature_id","rdf_seealso","clip_geometry","bouwjaar","status","gebruiksdoel","oppervlakte_min","oppervlakte_max","aantal_verblijfsobjecten","count")
df_buildings = df_buildings.drop(*cols)
df_buildings_T1 = df_buildings_T1.drop(*cols,"geometry")
df_buildings_T2 = df_buildings_T2.drop(*cols,"geometry")
df_buildings_T3 = df_buildings_T3.drop(*cols,"geometry")

StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 6, Finished, Available, Finished)

In [6]:
# Join BAG and BAG+AHN data

# import required modules
from pyspark.sql.functions import col

df_buildings = df_buildings.withColumn("centroid",ST.centroid("geometry"))

cols = ("feature_id","rdf_seealso","clip_geometry","bouwjaar","status","gebruiksdoel","oppervlakte_min","oppervlakte_max","aantal_verblijfsobjecten","count")
df_buildings = df_buildings.drop(*cols)
df_buildings_T1 = df_buildings_T1.drop(*cols,"geometry")
df_buildings_T2 = df_buildings_T2.drop(*cols,"geometry")
df_buildings_T3 = df_buildings_T3.drop(*cols,"geometry")

df_buildingsT123 = df_buildings.join(df_buildings_T1, ["identificatie"]) \
        .join(df_buildings_T2, ["identificatie"]) \
        .join(df_buildings_T3, ["identificatie"], 'right')

df_buildingsT123.count()

StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 7, Finished, Available, Finished)

1196

In [132]:
display(df_buildingsT123)

StatementMeta(, 6bdce31e-6e08-4796-afda-1ec569ed3fd5, 133, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 108331a8-4096-4b4a-a6ce-5a87800998cf)

In [7]:
df_buildingsT123_points = df_buildingsT123.drop("geometry")


StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 8, Finished, Available, Finished)

In [49]:
# Import the required modules
from geoanalytics_fabric.tools import GWR

# Run the GWR tool to predict AHN4 (T3) height values for buildings at Loppersum
resultGWR = GWR() \
            .setExplanatoryVariables("T1_z_mean", "T2_z_mean") \
            .setDependentVariable(dependent_variable="T3_z_mean") \
            .setLocalWeightingScheme(local_weighting_scheme="Bisquare") \
            .setNumNeighbors(number_of_neighbors=10) \
            .runIncludeDiagnostics(dataframe=df_buildingsT123_points)

# View the first 5 rows of the result DataFrame
outputGWR = resultGWR.outputTrained.select(F.round("T1_z_mean", 3).alias("T1_z_mean"),
                            F.round("T2_z_mean", 3).alias("T2_z_mean"),
                            F.round("T3_z_mean", 3).alias("T3_z_mean"),
                            F.round("PREDICTED_T3_z_mean", 3).alias("PREDICTED_T3_z_mean"),
                            F.round("Intercept", 3).alias("Intercept"),
                            F.round("SE_Intercept", 3).alias("SE_Intercept"),
                            F.round("C_T1_z_mean", 3).alias("C_T1_z_mean"),
                            F.round("SE_T1_z_mean", 3).alias("SE_T1_z_mean"),
                            F.round("C_T2_z_mean", 3).alias("C_T2_z_mean"),
                            F.round("SE_T2_z_mean", 3).alias("SE_T2_z_mean"),
                            F.format_string("%.3e", F.col("COND_ADJ").cast("float")).alias("COND_ADJ"),
                            F.round("NUM_NBRS", 3).alias("NUM_NBRS"),
                            F.round("LOCALR2", 3).alias("LOCALR2"),
                            "geometry") \
                      .sort("LocalR2", ascending=False)

display(outputGWR)

# View the model diagnostics
for k, v in resultGWR.modelDiagnostics.items():
    print(f"| {k} | {v:.4f} |")



StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 50, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 0605be40-3d8c-4d54-b1cd-9a833f6a18f2)

| R2 | 0.9936 |
| AdjR2 | 0.9813 |
| AICc | 1509.7328 |
| Sigma2 | 0.0459 |
| EDoF | 378.0284 |


In [50]:
# join point geometry with polygon geometry

outputGWR_buildings = df_buildings.join(outputGWR, df_buildings.centroid == outputGWR.geometry, 'right').drop(outputGWR['geometry'])

display(outputGWR_buildings)


StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 51, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, b040ddb5-2e6c-4e54-a7a0-28f29720c9d7)

In [51]:
path_output = "Files/Results/Buildings/Loppersum/buildings_AHN4_regression.parquet"

outputGWR_buildings.write.save(path_output, format="geoparquet")

StatementMeta(, 7a89620e-5459-412c-b676-8e7c0027b854, 52, Finished, Available, Finished)