## Geometry String to Object

This example demonstrates how to convert Geometry json strings to geometry

In [None]:
# View the first 5 rows of the state_boundaries_sedona DataFrame

# Path to the GeoJSON file
geojson_path = "hdfs://columbus-oh.cs.colostate.edu:30785/geospatial/input/cb_2018_us_state_20m.json"

# Read the GeoJSON file using the defined schema using sedona into a spark dataframe
state_boundaries_sedona = spark.read.schema(geojsonSchema).json(geojson_path, multiLine=True)

# Explode the features array to create a row for each feature and select the columns
state_boundaries_sedona = (state_boundaries_sedona
                        .select(F.explode("features").alias("features"))
                        .select("features.*")
                        # Use Sedona's ST_GeomFromGeoJSON function to convert the geometry string to a geometry object
                        .withColumn("geometry", F.expr("ST_GeomFromGeoJSON(geometry)"))
                        )

state_boundaries_sedona.show(5, truncate=False)

## Running Spatial Queries

https://sedona.apache.org/1.5.1/api/sql/Function/

### Range Query

This example demonstrates how to perform a range query using ST_Contains to find geometries within a specified polygon:

In [None]:
# Define a polygon using ST_PolygonFromEnvelope and perform a range query

bbox_polygon = "ST_PolygonFromEnvelope(-79.5, 37.9, -75.6, 39.8)"

# Perform the range query to find features within the bounding box
contained_features = state_boundaries_sedona.filter(
    F.expr(f"ST_Contains({bbox_polygon}, geometry)")
)

# Show results
contained_features.show()

## KNN Query

This example demonstrates how to perform a k-nearest neighbors (KNN) query using ST_Distance to find the k nearest geometries to a specified point:

In [None]:
from pyspark.sql import functions as F

# Calculate the center of the bounding box and create a WKT representation of the point
center_longitude = (-79.5 + -75.6) / 2
center_latitude = (37.9 + 39.8) / 2
center_point_wkt = f"POINT({center_longitude} {center_latitude})"

# Perform the KNN query using ST_Distance to calculate the distance to the center point
knnQueryResult = state_boundaries_sedona.select(
    # Access the 'NAME' from the 'properties' map
    F.col("properties").getItem("NAME").alias("NAME"),
    F.expr(f"ST_Distance(ST_GeomFromWKT('{center_point_wkt}'), geometry)").alias("distance")
).orderBy("distance").limit(5)

knnQueryResult.show()

## Generating Geohashes

This example demonstrates how to generate the geohash of each geometry

In [None]:
# Define the precision for the GeoHash
precision = 17

# Apply the ST_GeoHash function to each DataFrame in the dictionary
for file_name, df in json_dataset_dataframes.items():
    # Add a new column 'geohash' to the DataFrame
    # The new column is the GeoHash of the 'geometry' column with the given precision
    df = df.withColumn('geohash', F.expr(f"ST_GeoHash(geometry, {precision})"))
    
    # Update the DataFrame in the dictionary
    json_dataset_dataframes[file_name] = df

In [None]:
# Loop through each DataFrame in the dictionary
for file_name, df in json_dataset_dataframes.items():
    # Print the file name
    print(f"File: {file_name}")
    
    # Show the first few rows of the DataFrame
    df.show()