# Imports

In [None]:
from pyspark.sql.types import StructType, StructField, StringType
from sedona.sql.types import GeometryType
from sedona.spark import SedonaContext

import pyspark.sql.functions as psf

# Functions

In [None]:
from sedona.spark import SedonaPyDeck
# Functions
def view_geodf(df):
    # PyDeck
    fill_color=[255, 12, 250]
    census_map = SedonaPyDeck.create_choropleth_map(df=df, fill_color=fill_color)
    return census_map

# Session

In [None]:
builder = SedonaContext.builder().appName(
        'Sedona Session'
    )
    # Set sedona session
spark = SedonaContext.create(builder.getOrCreate())
sc = spark.sparkContext
sc.setSystemProperty("sedona.global.charset", "utf8")

# Variables

In [None]:
census_path = "/opt/data/input/euskadi.parquet"
output_path = "/opt/data/output/census/"

# Extract

In [None]:
SCHEMA = StructType([
        StructField("geometry", GeometryType(), nullable=False),
        StructField("NPRO", StringType(), nullable=True),
    ])

# Read
df = spark.read.schema(
            SCHEMA # Read schema
        ).format(
            'geoparquet'  # File format
        ).load(
            census_path  # Load path
        )

df.show()

# Transform

In [None]:
df.withColumn('NPRO', psf.trim('NPRO')).withColumn('NCA', psf.lit('Euskadi')).createOrReplaceTempView("census")

dissolved_census = spark.sql(f"""
    SELECT NCA, ST_Union_Aggr(geometry) AS geometry 
    FROM census
    GROUP BY NCA
""")
dissolved_census.show()

# Load

In [None]:
dissolved_census.write.format(
            'geoparquet',  # File format
        ).mode("overwrite").save(output_path)


# Check

In [None]:
df = spark.read.format('geoparquet').load(output_path)
df.show()

In [None]:
view_geodf(df)