#### Supervisor's Advice
- **Higher Dimensions:** I suggest considering higher dimensions for this implementation. For instance, you could use **time** as a third attribute to evaluate the performance of different indices in accelerating query processing.
- **Query Definition:** Please ensure that you formally define your queries in the Assignment 2 report.|

### Project Flow

**Dataset Preparation** $\rightarrow$ **Algorithm Implementation** $\rightarrow$ **Comparison Experiments**

In [10]:
import pandas as pd
import os

def build_dataset(file_path: str) -> pd.DataFrame:
    """
    Reads a CSV file and constructs a dataset with Latitude and Longitude.
    Returns None if the dataset cannot be built.
    """
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return None

    # Extract latitude and longitude columns
    # Note: The actual column names in the CSV file are 'decimalLatitude' and 'decimalLongitude' (starting with lowercase)
    required_columns = ['decimalLatitude', 'decimalLongitude']

    # Read CSV file with only specific columns to avoid DtypeWarning and save memory
    try:
        df = pd.read_csv(file_path, usecols=required_columns)
        
        # Construct dataset containing only latitude and longitude, and remove null values
        dataset = df.dropna()
        
        # Rename columns for easier use later (optional)
        dataset.columns = ['Latitude', 'Longitude']
        
        print("Dataset constructed successfully.")
        print(dataset.head())
        print(f"\nShape of dataset: {dataset.shape}")
        return dataset
        
    except ValueError as e:
        print(f"Error reading columns: {e}")
        # Fallback to reading just the header to show available columns
        try:
            df_header = pd.read_csv(file_path, nrows=0)
            print(f"Available columns: {df_header.columns.tolist()}")
        except Exception:
            pass
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [11]:
# Build Platypus dataset
platypus_file_path = 'Dataset/Platypus/Platypus.csv'
platypus_dataset = build_dataset(platypus_file_path)

# Build Legless Lizard dataset
legless_file_path = 'Dataset/Legless_Lizards/Legless_Lizards.csv'
legless_dataset = build_dataset(legless_file_path)

Dataset constructed successfully.
    Latitude   Longitude
0 -34.283333  119.450000
1 -38.155730  144.293950
2 -38.150160  144.301090
3 -38.082467  144.281096
4 -38.147793  144.311988

Shape of dataset: (29656, 2)
Dataset constructed successfully.
    Latitude   Longitude
0 -33.756980  150.628200
1 -33.601780  150.829319
2 -33.740972  150.741930
3 -34.170950  150.612113
4 -33.762350  150.831433

Shape of dataset: (1454283, 2)
Dataset constructed successfully.
    Latitude   Longitude
0 -33.756980  150.628200
1 -33.601780  150.829319
2 -33.740972  150.741930
3 -34.170950  150.612113
4 -33.762350  150.831433

Shape of dataset: (1454283, 2)


In [17]:
import geopandas as gpd
import fiona

def build_polygon_dataset(gdb_path: str, layer_name: str = None) -> gpd.GeoDataFrame:
    """
    Reads a GDB file and constructs a GeoDataFrame containing polygons.
    If layer_name is not provided, reads the first layer found.
    """
    # Check if the GDB directory exists
    if not os.path.exists(gdb_path):
        print(f"Error: GDB not found at {gdb_path}")
        return None
        
    try:
        # List all layers in the GDB to understand the structure
        layers = fiona.listlayers(gdb_path)
        print(f"Layers found in {os.path.basename(gdb_path)}: {layers}")
        
        if not layers:
            print("Error: No layers found in the GDB.")
            return None
            
        # Determine which layer to read (default to the first one if not specified)
        target_layer = layer_name if layer_name else layers[0]
        
        if target_layer not in layers:
             print(f"Error: Layer '{target_layer}' not found in GDB.")
             return None
             
        print(f"Reading layer: '{target_layer}'...")
        
        # Read the layer into a GeoDataFrame
        gdf = gpd.read_file(gdb_path, layer=target_layer)
        
        print("Dataset constructed successfully.")
        print(f"Shape: {gdf.shape}")
        print(f"Geometry Type: {gdf.geom_type.unique()}")
        print(f"CRS (Coordinate Reference System): {gdf.crs}")
        print(gdf.head())
        return gdf

    except ImportError:
        print("Error: 'geopandas' or 'fiona' library is not installed. Please install them using 'pip install geopandas fiona'.")
        return None
    except Exception as e:
        print(f"An error occurred while reading the GDB: {e}")
        return None

# Define paths to the GDB files
protected_areas_gdb = 'Dataset/Protected_Areas_of_Queensland/data.gdb'
wetlands_gdb = 'Dataset/Wetlands_data_QueenLand/data.gdb'

# Build the datasets
print("--- Building Protected Areas Dataset ---")
protected_areas_gdf = build_polygon_dataset(protected_areas_gdb)

print("\n--- Building Wetlands Dataset ---")
wetlands_gdf = build_polygon_dataset(wetlands_gdb)

--- Building Protected Areas Dataset ---
Layers found in data.gdb: ['Protected_areas']
Reading layer: 'Protected_areas'...
Dataset constructed successfully.
Shape: (2214, 26)
Geometry Type: ['MultiPolygon']
CRS (Coordinate Reference System): EPSG:7844
  lot     plan   lotplans  sysintcode                   estatename  \
0  28    SB653    28SB653  0691ABB001  Abbot Bay Conservation Park   
1   1  AP22467   1AP22467  3336ABE001      Abergowrie State Forest   
2   2  AP22467   2AP22467  3336ABE001      Abergowrie State Forest   
3   5  AP22467   5AP22467  3336ABE001      Abergowrie State Forest   
4  10  AP22467  10AP22467  3336ABE001      Abergowrie State Forest   

      nameabbrev                     namecaps esttype     dcdbtenure  \
0   Abbot Bay CP  ABBOT BAY CONSERVATION PARK      CP  National Park   
1  Abergowrie SF      ABERGOWRIE STATE FOREST      SF   State Forest   
2  Abergowrie SF      ABERGOWRIE STATE FOREST      SF   State Forest   
3  Abergowrie SF      ABERGOWRIE STATE 