<a href="https://colab.research.google.com/github/abhijithnadig/abhijithnadig/blob/main/Spatial_Indexing_using_Hilbert_Curves.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **CS236 - Database Management Systems**

## **Milestone 1**

### **Name -** Abhijith A Nadig
### **Student ID -** 862546804

In [None]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null

In [None]:
!wget -q https://dlcdn.apache.org/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz
!tar xf spark-3.5.3-bin-hadoop3.tgz

In [None]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.5.3-bin-hadoop3"
os.environ["PYTHONPATH"] = "/content/spark-3.5.3-bin-hadoop3/python"

In [None]:
!pip install findspark
import findspark
findspark.init()

Collecting findspark
  Downloading findspark-2.0.1-py2.py3-none-any.whl.metadata (352 bytes)
Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)
Installing collected packages: findspark
Successfully installed findspark-2.0.1


In [None]:
!pip install apache-sedona[spark]
!pip install hilbertcurve

Collecting apache-sedona[spark]
  Downloading apache_sedona-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB)
Downloading apache_sedona-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (190 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/190.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m112.6/190.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: apache-sedona
Successfully installed apache-sedona-1.7.0
Collecting hilbertcurve
  Downloading hilbertcurve-2.0.5-py3-none-any.whl.metadata (11 kB)
Downloading hilbertcurve-2.0.5-py3-none-any.whl (8.6 kB)
Installing collected packages: hilbertcurve
Successfully installed hilbertcurve-2.0.5


In [None]:
from sedona.core.SpatialRDD import PointRDD
from sedona.core.spatialOperator import RangeQuery, KNNQuery
from sedona.core.geom.envelope import Envelope
from sedona.core.enums import GridType, IndexType, FileDataSplitter
from shapely.geometry import Point
from pyspark.storagelevel import StorageLevel
from pyspark.sql.functions import col, expr, from_unixtime, lit, when
from datetime import datetime
from hilbertcurve.hilbertcurve import HilbertCurve
from sedona.spark import *
import time
import pandas as pd


In [None]:
config = SedonaContext.builder(). \
    config('spark.jars.packages',
           'org.apache.sedona:sedona-spark-3.0_2.12:1.6.1,'
           'org.datasyslab:geotools-wrapper:1.6.1-28.2'). \
    config('spark.jars.repositories', 'https://artifacts.unidata.ucar.edu/repository/unidata-all'). \
    getOrCreate()
sedona = SedonaContext.create(config)

In [None]:
# Define the file path to the input data
file_path = '/content/2017-07-22_09-02-53.txt.gz'

# Read the JSON file into a Sedona DataFrame
t_data = sedona.read.json(file_path)


In [None]:
# Filter rows with null coordinates and select the 'place.bounding_box' column, displaying the first 5 results without truncation
t_data.filter(col('coordinates').isNull()).select('place.bounding_box').show(5, truncate=False)

+---------------------------------------------------------------------------------------------------------------+
|bounding_box                                                                                                   |
+---------------------------------------------------------------------------------------------------------------+
|{[[[-2.319934, 53.343623], [-2.319934, 53.570282], [-2.147026, 53.570282], [-2.147026, 53.343623]]], Polygon}  |
|{[[[109.664659, 20.221264], [109.664659, 25.518608], [117.17479, 25.518608], [117.17479, 20.221264]]], Polygon}|
|{[[[139.673228, 35.673404], [139.673228, 35.72991], [139.745133, 35.72991], [139.745133, 35.673404]]], Polygon}|
|{[[[139.716598, 35.67375], [139.716598, 35.67375], [139.716598, 35.67375], [139.716598, 35.67375]]], Polygon}  |
|{[[[72.64293, 15.606794], [72.64293, 22.029028], [80.899558, 22.029028], [80.899558, 15.606794]]], Polygon}    |
+---------------------------------------------------------------------------------------

In [None]:
# Convert the Spark DataFrame 't_data' to a Pandas DataFrame for local processing
df = t_data.toPandas()
# Save the Pandas DataFrame to a CSV file named 'twitter.csv', excluding the index column
df.to_csv('twitter.csv', index=None)


In [None]:
# Step 1: Handle longitude and latitude extraction
data_cleaned = t_data.withColumn(
    "longitude",
    when(
        col("coordinates").isNotNull(),  # If 'coordinates' is available, use it
        expr("coordinates.coordinates[0]")
    ).when(
        col("place.bounding_box").isNotNull(),  # If 'coordinates' is missing, use the center of 'bounding_box'
        (expr("place.bounding_box.coordinates[0][0][0]") + expr("place.bounding_box.coordinates[0][2][0]")) / 2
    ).otherwise(None)  # Set 'longitude' to null if both 'coordinates' and 'bounding_box' are missing
).withColumn(
    "latitude",
    when(
        col("coordinates").isNotNull(),  # If 'coordinates' is available, use it
        expr("coordinates.coordinates[1]")
    ).when(
        col("place.bounding_box").isNotNull(),  # If 'coordinates' is missing, use the center of 'bounding_box'
        (expr("place.bounding_box.coordinates[0][0][1]") + expr("place.bounding_box.coordinates[0][2][1]")) / 2
    ).otherwise(None)  # Set 'latitude' to null if both 'coordinates' and 'bounding_box' are missing
)

# Step 2: Create location column
data_cleaned = data_cleaned.withColumn(
    "location",
    when(
        col("longitude").isNotNull() & col("latitude").isNotNull(),  # Ensure both longitude and latitude are present
        expr("ST_Point(longitude, latitude)")  # Create a point location from longitude and latitude
    ).otherwise(None)  # Set 'location' to null if either 'longitude' or 'latitude' is missing
)

# Step 3: Format the timestamp
data_cleaned = data_cleaned.withColumn(
    "timestamp",
    from_unixtime(col("timestamp_ms").cast("long") / 1000).cast("timestamp")  # Convert timestamp in milliseconds to a proper timestamp
)

# Step 4: Filter rows with valid location and timestamp
data_cleaned = data_cleaned.filter(
    col("location").isNotNull() & col("timestamp").isNotNull()  # Only keep rows with valid location and timestamp
)

# Step 5: Select only the desired columns (longitude, latitude, and timestamp)
data_cleaned = data_cleaned.selectExpr(
    "longitude", "latitude", "timestamp"  # Select only the relevant columns for further processing
)

# Step 6: Write to CSV
output_path = "/content/Outputs/Null_handled_Twitter_dataset"  # Define the output path
data_cleaned.write.mode("overwrite").csv(output_path, header=False)  # Save the cleaned data to CSV (without header)


In [None]:
# Define the input location where the processed data is stored
input_location = "/content/Outputs/Null_handled_Twitter_dataset"

# Set the offset for longitude and latitude, assuming they start from Column 0 in the dataset
offset = 0

# Specify the file format as CSV using the FileDataSplitter enumeration
splitter = FileDataSplitter.CSV

# Flag indicating whether to carry other attributes (e.g., additional metadata) along with spatial data
carry_other_attributes = True

# Define the storage level for the PointRDD to be loaded into memory only
level = StorageLevel.MEMORY_ONLY

# Define the source coordinate system (EPSG 4326 for WGS 84)
s_epsg = "epsg:4326"

# Define the target coordinate system (EPSG 5070 for USA Contiguous Albers Equal Area Conic)
t_epsg = "epsg:5070"

# Create the PointRDD by reading the spatial data from the CSV file into Sedona's spatial RDD
point_rdd = PointRDD(
    sparkContext=sedona.sparkContext,  # SparkContext needed to initialize Sedona's operations
    InputLocation=input_location,
    Offset=offset,  # Column offset for longitude/latitude
    splitter=splitter,  # File format for the data
    carryInputData=carry_other_attributes  # Whether to retain additional non-spatial attributes
)


In [None]:
# Perform spatial partitioning on the PointRDD using a specified strategy
partitioning_strategy = GridType.QUADTREE  # Choose a partitioning strategy (QUADTREE, KDBTREE, etc.)
point_rdd.spatialPartitioning(partitioning_strategy)  # Apply the chosen partitioning strategy to the PointRDD


True

In [None]:
# Set to TRUE if a join query is going to be run after building the index, otherwise leave as False
build_on_spatial_partitioned_rdd = False

# Build an RTREE index on the PointRDD to optimize spatial queries
point_rdd.buildIndex(IndexType.RTREE, build_on_spatial_partitioned_rdd)


In [None]:
# Define the bounding box coordinates for the spatial range query (xmin, ymin, xmax, ymax)
xmin, ymin = -80.01, 50.52  # Minimum longitude and latitude
xmax, ymax = -70.01, 40.01  # Maximum longitude and latitude

# Define the time range for filtering results
t1 = datetime.strptime("2017-07-22T09:08:22.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # Start of time range
t2 = datetime.strptime("2017-07-22T09:10:29.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # End of time range

# Create a spatial range query window using the defined bounding box coordinates
query_window = Envelope(xmin, xmax, ymin, ymax)

# Flag to specify whether to use an index for the query (RTREE or other types)
index_used = True

# Start timing the spatial range query execution
start_time = time.time()

# Flag to consider boundary intersection when performing the range query
consider_boundary = True  #

# Perform the spatial range query on the PointRDD (point_rdd) using the query window (envelope)
query_result = RangeQuery.SpatialRangeQuery(
    point_rdd,  # The input PointRDD containing spatial data
    query_window,  # The bounding box as the query window
    consider_boundary,  # Flag to consider boundary intersections in query results
    index_used  # Flag indicating whether to use an index for the query
)

# Filter the spatial query results by the specified time range
filtered_results = query_result.filter(
    lambda geom: t1 <= datetime.strptime(geom.getUserData(), "%Y-%m-%dT%H:%M:%S.%fZ") <= t2
    # Convert the timestamp from the geometry's user data and check if it falls within the time range
)

# End timing the query execution
end_time = time.time()

# Output the total time taken to execute the query
print("Query executed in", end_time - start_time, "seconds")  # Print the elapsed time for the query

# Extract and print the latitude and longitude (up to 3 decimal places) from geometry objects in a filtered dataset.
print("Latitude         Longitude")
for result in filtered_results.collect():
    geometry = result.geom  # Access the geometry object
    latitude = round(geometry.y, 3)  # Extract and round latitude
    longitude = round(geometry.x, 3)  # Extract and round longitude
    print("{:<16} {}".format(latitude, longitude))




Query executed in 0.042449951171875 seconds
Latitude         Longitude
40.073           -74.724
40.626           -74.244
40.626           -74.244
40.655           -73.949
40.704           -73.709
40.734           -74.185
40.736           -74.172
40.85            -73.849
40.85            -73.849
40.85            -73.849
41.022           -74.679
41.379           -72.868
41.379           -72.868
42.045           -71.114
42.208           -71.687
42.314           -71.089
42.886           -78.868


In [None]:
# Define the bounding box coordinates for the spatial range query (xmin, ymin, xmax, ymax)
xmin, ymin = -100.01, 10.01 # Minimum longitude and latitude
xmax, ymax = -82.00, 38.50 # Maximum longitude and latitude

# Define the time range for filtering results
t1 = datetime.strptime("2017-07-22T09:08:22.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # Start of time range
t2 = datetime.strptime("2017-07-22T09:10:29.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # End of time range

# Create a spatial range query window using the defined bounding box coordinates
query_window = Envelope(xmin, xmax, ymin, ymax)

# Flag to specify whether to use an index for the query (RTREE or other types)
index_used = True

# Start timing the spatial range query execution
start_time = time.time()

# Flag to consider boundary intersection when performing the range query
consider_boundary = True  #

# Perform the spatial range query on the PointRDD (point_rdd) using the query window (envelope)
query_result = RangeQuery.SpatialRangeQuery(
    point_rdd,  # The input PointRDD containing spatial data
    query_window,  # The bounding box as the query window
    consider_boundary,  # Flag to consider boundary intersections in query results
    index_used  # Flag indicating whether to use an index for the query
)

# Filter the spatial query results by the specified time range
filtered_results = query_result.filter(
    lambda geom: t1 <= datetime.strptime(geom.getUserData(), "%Y-%m-%dT%H:%M:%S.%fZ") <= t2
    # Convert the timestamp from the geometry's user data and check if it falls within the time range
)

# End timing the query execution
end_time = time.time()

# Output the total time taken to execute the query
print("Query executed in", end_time - start_time, "seconds")  # Print the elapsed time for the query

# Extract and print the latitude and longitude (up to 3 decimal places) from geometry objects in a filtered dataset.
print("Latitude         Longitude")
for result in filtered_results.collect():
    geometry = result.geom  # Access the geometry object
    latitude = round(geometry.y, 3)  # Extract and round latitude
    longitude = round(geometry.x, 3)  # Extract and round longitude
    print("{:<16} {}".format(latitude, longitude))



Query executed in 0.010615348815917969 seconds
Latitude         Longitude
19.032           -98.154
19.119           -98.261
19.433           -99.133
16.922           -96.364
19.184           -96.229
22.283           -97.823
20.941           -89.624
27.462           -82.579
27.699           -83.804
27.699           -83.804
27.699           -83.804
26.112           -97.492
26.177           -98.0
26.3             -97.926
26.315           -98.307
27.737           -97.432
27.798           -97.09
29.111           -97.286
29.288           -94.823
29.38            -94.968
29.548           -95.327
27.699           -83.804
27.71            -82.321
27.997           -82.443
29.763           -95.383
29.418           -98.541
29.418           -98.541
29.418           -98.541
29.418           -98.541
29.418           -98.541
29.418           -98.541
29.838           -95.446
29.838           -95.446
29.838           -95.446
29.902           -93.956
29.983           -90.011
30.426           -89.069
30.4

In [None]:
# Define the bounding box coordinates for the spatial range query (xmin, ymin, xmax, ymax)
xmin, ymin = 99.01, 10.01 # Minimum longitude and latitude
xmax, ymax = 72.00, 38.50 # Maximum longitude and latitude

# Define the time range for filtering results
t1 = datetime.strptime("2017-07-22T09:08:22.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # Start of time range
t2 = datetime.strptime("2017-07-22T09:10:29.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")  # End of time range

# Create a spatial range query window using the defined bounding box coordinates
query_window = Envelope(xmin, xmax, ymin, ymax)

# Flag to specify whether to use an index for the query (RTREE or other types)
index_used = True

# Start timing the spatial range query execution
start_time = time.time()

# Flag to consider boundary intersection when performing the range query
consider_boundary = True  #

# Perform the spatial range query on the PointRDD (point_rdd) using the query window (envelope)
query_result = RangeQuery.SpatialRangeQuery(
    point_rdd,  # The input PointRDD containing spatial data
    query_window,  # The bounding box as the query window
    consider_boundary,  # Flag to consider boundary intersections in query results
    index_used  # Flag indicating whether to use an index for the query
)

# Filter the spatial query results by the specified time range
filtered_results = query_result.filter(
    lambda geom: t1 <= datetime.strptime(geom.getUserData(), "%Y-%m-%dT%H:%M:%S.%fZ") <= t2
    # Convert the timestamp from the geometry's user data and check if it falls within the time range
)

# End timing the query execution
end_time = time.time()

# Output the total time taken to execute the query
print("Query executed in", end_time - start_time, "seconds")  # Print the elapsed time for the query

# Extract and print the latitude and longitude (up to 3 decimal places) from geometry objects in a filtered dataset.
print("Latitude         Longitude")
for result in filtered_results.collect():
    geometry = result.geom  # Access the geometry object
    latitude = round(geometry.y, 3)  # Extract and round latitude
    longitude = round(geometry.x, 3)  # Extract and round longitude
    print("{:<16} {}".format(latitude, longitude))



Query executed in 0.012325525283813477 seconds
Latitude         Longitude
10.543           76.137
12.923           77.558
12.939           80.162
12.972           77.595
13.024           80.262
15.276           74.006
15.4             74.005
18.704           98.931
18.508           73.91
18.772           98.999
18.784           98.953
18.792           98.969
19.097           73.036
19.174           72.874
19.174           72.874
19.174           72.874
19.174           72.874
19.174           72.874
19.939           82.584
21.171           72.789
21.171           72.789
22.954           73.335
23.014           72.57
22.613           88.352
22.613           88.352
22.613           88.352
22.613           88.352
22.613           88.352
24.459           73.826
25.373           82.933
25.374           78.732
26.628           73.878
26.825           80.884
26.91            75.748
27.014           80.824
28.331           77.351
28.369           76.975
28.391           84.133
28.391          

# **Milestone 2**

## **2.1 Obtain Hilbert Numbers**

In [None]:
# Initialize parameters for the Hilbert Curve:
p_val = 10
dim = 2

# Create a Hilbert curve instance with the specified parameters.
hilbert_instance = HilbertCurve(p_val, dim)

# Define the spatial bounds for the coordinate normalization.
x_min, y_min = -180, -90
x_max, y_max = 180, 90

# Function to compute the Hilbert index for a given point
def compute_hilbert_index(pt):
    lon_val = pt.geom.x
    lat_val = pt.geom.y

    # Normalize the longitude and latitude values to fit within the Hilbert curve's grid range
    x_norm = int((lon_val - x_min) / (x_max - x_min) * (2**p_val - 1))
    y_norm = int((lat_val - y_min) / (y_max - y_min) * (2**p_val - 1))

    # Compute the Hilbert index for the normalized coordinates
    hilbert_idx = hilbert_instance.distance_from_point([x_norm, y_norm])

    # Retrieve the user-defined data associated with the point
    ts = pt.getUserData()

    # Return the Hilbert index and timestamp as a formatted string
    return str(hilbert_idx) + "," + str(ts)

# Filter the RDD to retain only valid points with non-null geometries and user data.
valid_points_rdd = point_rdd.rawSpatialRDD.filter(
    lambda pt: pt.geom is not None and pt.getUserData() is not None
)

# Map the valid points to compute their Hilbert indices and include associated user data.
hilbert_mapped_data = valid_points_rdd.map(compute_hilbert_index)

# Print the first 5 results of the computed Hilbert indices and associated data.
print("Results:")
print(hilbert_mapped_data.take(5))

# Remove any existing output directory to prevent errors when saving the new data.
!rm -rf {output_dir}

# Specify the directory for saving the mapped Hilbert data.
output_dir = "/content/hilbert_data"

# Save the computed Hilbert indices and associated data as a text file in the specified directory.
hilbert_mapped_data.saveAsTextFile(output_dir)

# Confirm that the data has been successfully saved.
print("Data saved to: " + output_dir)


Results:
['457532,2017-07-22T09:02:53.000Z', '751701,2017-07-22T09:02:53.000Z', '505164,2017-07-22T09:02:53.000Z', '749570,2017-07-22T09:02:52.000Z', '733342,2017-07-22T09:02:53.000Z']
Data saved to: /content/hilbert_data


## **2.2 In Apache Sedona, spatially partition the datasets (using the new 2D data) into the worker nodes, and build an R-Tree index on each data partition.**

In [None]:
from sedona.core.enums import GridType, IndexType

# Spatially partition the datasets into worker nodes.
point_rdd.spatialPartitioning(GridType.QUADTREE)

# Build an R-Tree index on each partition.
point_rdd.buildIndex(IndexType.RTREE, True)

# Specify the directory for saving the partitioned and indexed data
output_dir = "/content/spatially_partitioned_indexed_data"

# Remove the output directory if it already exists to avoid conflicts
!rm -rf {output_dir}

# Save the spatially partitioned and indexed data
point_rdd.rawSpatialRDD.saveAsTextFile(output_dir)

# Print confirmation
print("Spatially partitioned and indexed data saved to: " + output_dir)



Spatially partitioned and indexed data saved to: /content/spatially_partitioned_indexed_data


## **2.3 Run Range query with time filter on the data**

In [None]:
import time
from datetime import datetime

# Function to convert ISO 8601 timestamp to Unix time
def parse_timestamp_to_unix(ts):
    try:
        return int(datetime.strptime(ts, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp())
    except ValueError:
        return None

# Function to process range queries
def process_range_query(query, hilbert_instance, p_val, x_min, x_max, y_min, y_max, hilbert_mapped_data):
    (x1, y1), (x2, y2), (t1, t2) = query

    # Convert t1 and t2 to Unix timestamps
    t1_unix = parse_timestamp_to_unix(t1)
    t2_unix = parse_timestamp_to_unix(t2)

    # Step 1: Enlarge the query range
    u1 = int((x1 - x_min) / (x_max - x_min) * (2**p_val - 1))
    v1 = int((y1 - y_min) / (y_max - y_min) * (2**p_val - 1))
    u2 = int((x2 - x_min) / (x_max - x_min) * (2**p_val - 1))
    v2 = int((y2 - y_min) / (y_max - y_min) * (2**p_val - 1))

    # Ensure corners align to the smallest enclosing grid rectangle
    u1, v1, u2, v2 = map(lambda val: max(0, min(val, 2**p_val - 1)), [u1, v1, u2, v2])

    # Step 2: Compute Hilbert indices for the enlarged rectangle
    Hu1v1 = hilbert_instance.distance_from_point([u1, v1])
    Hu2v2 = hilbert_instance.distance_from_point([u2, v2])

    # Retrieve all points with Hilbert index in the range [Hu1v1, Hu2v2] and temporal range [t1_unix, t2_unix]
    start_time = time.time()
    filtered_data = hilbert_mapped_data.filter(
        lambda row: (
            Hu1v1 <= int(row.split(",")[0]) <= Hu2v2 and
            t1_unix <= parse_timestamp_to_unix(row.split(",")[1]) <= t2_unix
        )
    ).collect()
    filter_end_time = time.time()

    # Refinement Step: Convert to (x, y, t) triples and return only points within the original range
    refined_results = []
    for data in filtered_data:
        hilbert_idx, timestamp = data.split(",")
        hilbert_idx = int(hilbert_idx)
        timestamp = parse_timestamp_to_unix(timestamp)
        x, y = hilbert_instance.point_from_distance(hilbert_idx)
        x = x / (2**p_val - 1) * (x_max - x_min) + x_min
        y = y / (2**p_val - 1) * (y_max - y_min) + y_min
        if x1 <= x <= x2 and y1 <= y <= y2 and t1_unix <= timestamp <= t2_unix:
            refined_results.append((x, y, timestamp))

    refinement_end_time = time.time()

    # Calculate metrics
    total_points_retrieved = len(filtered_data)
    spurious_points = total_points_retrieved - len(refined_results)
    spurious_fraction = spurious_points / total_points_retrieved if total_points_retrieved > 0 else 0
    print("Spurious Points: ", spurious_points)
    print("Total Points Retrieved: ", total_points_retrieved)
    print("Spurious Fraction: ", spurious_fraction)
    print("Filter Time: ",filter_end_time - start_time)
    print("Refinement Time: ", refinement_end_time - filter_end_time)
    print("Total Time: ", refinement_end_time - start_time)

    return {
        "results": refined_results
        }


# Example Query
query = [(-100, 30), (-90, 40), ("2017-07-22T09:06:00.000Z", "2017-07-22T09:07:00.000Z")]

# Process the query
result = process_range_query(query, hilbert_instance, p_val, x_min, x_max, y_min, y_max, hilbert_mapped_data)

# Display results and metrics
print("Query Results:")
print(result["results"])



Spurious Points:  41
Total Points Retrieved:  70
Spurious Fraction:  0.5857142857142857
Filter Time:  2.4094042778015137
Refinement Time:  0.0034723281860351562
Total Time:  2.412876605987549
Query Results:
[(-90.26392961876833, 32.111436950146626, 1500714360), (-98.0058651026393, 30.175953079178882, 1500714362), (-92.37536656891496, 32.46334310850439, 1500714363), (-92.37536656891496, 34.750733137829926, 1500714365), (-96.95014662756599, 32.81524926686217, 1500714369), (-97.30205278592376, 32.63929618768327, 1500714369), (-94.83870967741935, 39.67741935483872, 1500714373), (-97.30205278592376, 32.63929618768327, 1500714375), (-96.95014662756599, 32.46334310850439, 1500714376), (-98.0058651026393, 30.175953079178882, 1500714376), (-97.65395894428153, 31.055718475073306, 1500714378), (-96.95014662756599, 32.81524926686217, 1500714379), (-97.30205278592376, 36.686217008797655, 1500714381), (-96.59824046920822, 32.99120234604105, 1500714383), (-90.26392961876833, 32.111436950146626, 15007