diff --git a/common/pom.xml b/common/pom.xml index 000cfcd650f..67898b6be1c 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -124,6 +124,11 @@ org.datasyslab campskeleton + + + org.datasyslab + proj4sedona + src/main/java diff --git a/common/src/main/java/org/apache/sedona/common/Constructors.java b/common/src/main/java/org/apache/sedona/common/Constructors.java index 6542e691a22..d44aede0e85 100644 --- a/common/src/main/java/org/apache/sedona/common/Constructors.java +++ b/common/src/main/java/org/apache/sedona/common/Constructors.java @@ -264,20 +264,24 @@ public static Geometry lineFromText(String geomString) { } public static Geometry polygonFromEnvelope(double minX, double minY, double maxX, double maxY) { + return polygonFromEnvelope(minX, minY, maxX, maxY, GEOMETRY_FACTORY); + } + + public static Geometry polygonFromEnvelope( + double minX, double minY, double maxX, double maxY, GeometryFactory factory) { Coordinate[] coordinates = new Coordinate[5]; coordinates[0] = new Coordinate(minX, minY); coordinates[1] = new Coordinate(minX, maxY); coordinates[2] = new Coordinate(maxX, maxY); coordinates[3] = new Coordinate(maxX, minY); coordinates[4] = coordinates[0]; - return GEOMETRY_FACTORY.createPolygon(coordinates); + return factory.createPolygon(coordinates); } public static Geometry makeEnvelope( double minX, double minY, double maxX, double maxY, int srid) { - Geometry envelope = polygonFromEnvelope(minX, minY, maxX, maxY); - envelope.setSRID(srid); - return envelope; + GeometryFactory geometryFactory = new GeometryFactory(new PrecisionModel(), srid); + return polygonFromEnvelope(minX, minY, maxX, maxY, geometryFactory); } public static Geometry makeEnvelope(double minX, double minY, double maxX, double maxY) { @@ -315,10 +319,6 @@ public static Geometry geomFromMySQL(byte[] binary) throws ParseException { buffer.get(wkb); - Geometry geom = geomFromWKB(wkb); - - geom.setSRID(srid); - - return geom; + return geomFromWKB(wkb, srid); } } diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java b/common/src/main/java/org/apache/sedona/common/Functions.java index 62a42e5384e..22b9a2fbf4d 100644 --- a/common/src/main/java/org/apache/sedona/common/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/Functions.java @@ -270,9 +270,7 @@ public static Geometry expand(Geometry geometry, double deltaX, double deltaY, d newCoords[4] = newCoords[0]; return geometry.getFactory().createPolygon(newCoords); } - Geometry result = Constructors.polygonFromEnvelope(minX, minY, maxX, maxY); - result.setSRID(geometry.getSRID()); - return result; + return Constructors.polygonFromEnvelope(minX, minY, maxX, maxY, geometry.getFactory()); } public static Geometry buffer(Geometry geometry, double radius) { diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java new file mode 100644 index 00000000000..b5a5c1c43e0 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.datasyslab.proj4sedona.core.Proj; +import org.datasyslab.proj4sedona.jts.JTSGeometryTransformer; +import org.datasyslab.proj4sedona.parser.CRSSerializer; +import org.locationtech.jts.geom.Geometry; + +/** + * CRS transformation functions using proj4sedona library. + * + *

This class provides coordinate reference system (CRS) transformation for vector geometries + * using the proj4sedona library, which is a pure Java implementation without LGPL dependencies. + * + *

Supported CRS Input Formats

+ * + * + * + *

NAD Grid Support

+ * + *

NAD grid files can be specified in PROJ strings using the +nadgrids parameter: + * + *

+ * + *

Grid files with @ prefix are optional - if not found, the transformation continues without the + * grid. Without @ prefix, the grid is mandatory and an error is thrown if not found. + * + * @since 1.9.0 + */ +public class FunctionsProj4 { + + /** Pattern to match EPSG codes (e.g., "EPSG:4326", "epsg:2154") */ + private static final Pattern EPSG_PATTERN = + Pattern.compile("^EPSG:(\\d+)$", Pattern.CASE_INSENSITIVE); + + /** + * Transform a geometry from the source CRS specified by the geometry's SRID to the target CRS. + * + * @param geometry The geometry to transform + * @param targetCRS Target CRS definition (EPSG code, WKT, PROJ string, or PROJJSON) + * @return The transformed geometry with SRID set to the target CRS EPSG code (if identifiable) + * @throws IllegalArgumentException if source CRS cannot be determined from geometry SRID + */ + public static Geometry transform(Geometry geometry, String targetCRS) { + return transform(geometry, null, targetCRS); + } + + /** + * Transform a geometry from one CRS to another with lenient parameter. + * + *

Note: The {@code lenient} parameter is accepted for API compatibility with GeoTools + * but is ignored. proj4sedona always performs strict transformation. + * + * @param geometry The geometry to transform + * @param sourceCRS Source CRS definition (EPSG code, WKT, PROJ string, or PROJJSON), or null to + * use geometry's SRID + * @param targetCRS Target CRS definition (EPSG code, WKT, PROJ string, or PROJJSON) + * @param lenient Ignored parameter, kept for API compatibility + * @return The transformed geometry with SRID set to the target CRS EPSG code (if identifiable) + * @throws IllegalArgumentException if source CRS is null and geometry has no SRID + */ + public static Geometry transform( + Geometry geometry, String sourceCRS, String targetCRS, boolean lenient) { + // lenient parameter is ignored - proj4sedona doesn't support it + return transform(geometry, sourceCRS, targetCRS); + } + + /** + * Transform a geometry from one CRS to another. + * + * @param geometry The geometry to transform + * @param sourceCRS Source CRS definition (EPSG code, WKT, PROJ string, or PROJJSON), or null to + * use geometry's SRID + * @param targetCRS Target CRS definition (EPSG code, WKT, PROJ string, or PROJJSON) + * @return The transformed geometry with SRID set to the target CRS EPSG code (if identifiable) + * @throws IllegalArgumentException if source CRS is null and geometry has no SRID + */ + public static Geometry transform(Geometry geometry, String sourceCRS, String targetCRS) { + if (geometry == null) { + return null; + } + + // Determine source CRS + String effectiveSourceCRS = sourceCRS; + if (effectiveSourceCRS == null || effectiveSourceCRS.isEmpty()) { + int srid = geometry.getSRID(); + if (srid != 0) { + effectiveSourceCRS = "EPSG:" + srid; + } else { + throw new IllegalArgumentException( + "Source CRS must be specified. No SRID found on geometry."); + } + } + + // Check if source and target are the same EPSG code + Integer sourceSRID = extractEpsgCode(effectiveSourceCRS); + Integer targetSRID = extractEpsgCode(targetCRS); + + if (sourceSRID != null && sourceSRID.equals(targetSRID)) { + // Same CRS, just update SRID if needed + if (geometry.getSRID() != targetSRID) { + Geometry result = geometry.copy(); + result = Functions.setSRID(result, targetSRID); + result.setUserData(geometry.getUserData()); + return result; + } + return geometry; + } + + // Perform transformation using cached projections to avoid per-row overhead. + // JTSGeometryTransformer.cached() uses Proj4.cachedConverter() internally, + // which caches parsed Proj objects for repeated transformations. + JTSGeometryTransformer transformer = + JTSGeometryTransformer.cached(effectiveSourceCRS, targetCRS); + Geometry transformed = transformer.transform(geometry); + + // Set SRID on result + if (targetSRID == null) { + try { + Proj targetProj = new Proj(targetCRS); + String epsgCode = CRSSerializer.toEpsgCode(targetProj); + if (epsgCode != null) { + Integer epsg = extractEpsgCode(epsgCode); + if (epsg != null) { + targetSRID = epsg; + } + } + } catch (Exception e) { + // Could not identify EPSG code, leave SRID as 0 + } + } + + if (targetSRID == null) { + targetSRID = 0; + } + transformed = Functions.setSRID(transformed, targetSRID); + + // Preserve user data + transformed.setUserData(geometry.getUserData()); + + return transformed; + } + + /** + * Extract EPSG code from a CRS string if it's in EPSG format. + * + * @param crs CRS string (may be EPSG code, WKT, PROJ string, etc.) + * @return EPSG code as integer, or null if not an EPSG code format + */ + private static Integer extractEpsgCode(String crs) { + if (crs == null || crs.isEmpty()) { + return null; + } + + Matcher matcher = EPSG_PATTERN.matcher(crs.trim()); + if (matcher.matches()) { + try { + return Integer.parseInt(matcher.group(1)); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } +} diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java new file mode 100644 index 00000000000..c9c3951fa46 --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common; + +import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; + +import java.nio.file.Files; +import java.nio.file.Path; +import org.datasyslab.proj4sedona.Proj4; +import org.datasyslab.proj4sedona.defs.Defs; +import org.datasyslab.proj4sedona.grid.NadgridRegistry; +import org.junit.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; + +/** + * Performance tests for Proj4sedona CRS transformation. + * + *

These tests measure: + * + *

    + *
  1. Proj4sedona vs GeoTools performance comparison + *
  2. Cache effects in Proj4sedona: + *
      + *
    • 2.1 Built-in EPSG codes + *
    • 2.2 EPSG codes with remote fetching from spatialreference.org + *
    • 2.3 PROJ and WKT strings + *
    • 2.4 Grid files (local and remote) + *
    + *
+ * + *

Each test uses the pattern: 1 cold call (cache miss) + N warm calls (cache hits) + */ +public class FunctionsProj4PerformanceTest extends TestBase { + + private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); + private static final int WARM_ITERATIONS = 10; + + // Test coordinates + private static final double SF_LON = -122.4194; + private static final double SF_LAT = 37.7749; + + // Remote grid file URL (OSTN15 from GitHub) + private static final String REMOTE_GRID_URL = + "https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb"; + + // ==================== Helper Methods ==================== + + private Point createTestPoint(double lon, double lat) { + return GEOMETRY_FACTORY.createPoint(new Coordinate(lon, lat)); + } + + private void printHeader(String title) { + System.out.println(); + System.out.println("=".repeat(70)); + System.out.println(title); + System.out.println("=".repeat(70)); + } + + private void printResult(String label, double coldMs, double warmAvgUs, int cacheEntries) { + double speedup = (coldMs * 1000) / warmAvgUs; + System.out.printf("Cold (1 call): %10.2f ms%n", coldMs); + System.out.printf("Warm (%d calls): %10.2f μs avg%n", WARM_ITERATIONS, warmAvgUs); + System.out.printf("Cache speedup: %10.0fx%n", speedup); + if (cacheEntries >= 0) { + System.out.printf("Proj cache entries: %10d%n", cacheEntries); + } + } + + // ==================== 1. Proj4sedona vs GeoTools ==================== + + @Test + public void testProj4VsGeoToolsEpsgPerformance() throws Exception { + printHeader("1. Proj4sedona vs GeoTools (EPSG:4326 -> EPSG:3857)"); + + Point point = createTestPoint(SF_LON, SF_LAT); + + // ===== Proj4sedona ===== + System.out.println("\nProj4sedona:"); + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry proj4ColdResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + double proj4ColdMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + } + double proj4WarmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double proj4WarmAvgUs = (proj4WarmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Proj4sedona", proj4ColdMs, proj4WarmAvgUs, Proj4.getCacheSize()); + assertNotNull(proj4ColdResult); + assertEquals(3857, proj4ColdResult.getSRID()); + + // ===== GeoTools ===== + System.out.println("\nGeoTools:"); + + // Cold call + coldStart = System.nanoTime(); + Geometry gtColdResult = FunctionsGeoTools.transform(point, "EPSG:4326", "EPSG:3857"); + double gtColdMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsGeoTools.transform(point, "EPSG:4326", "EPSG:3857"); + } + double gtWarmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double gtWarmAvgUs = (gtWarmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("GeoTools", gtColdMs, gtWarmAvgUs, -1); + assertNotNull(gtColdResult); + + // ===== Comparison ===== + double warmSpeedup = gtWarmAvgUs / proj4WarmAvgUs; + System.out.printf( + "%nComparison: Proj4sedona is %.1fx faster than GeoTools (warm)%n", warmSpeedup); + + // Verify both produce similar results + assertEquals( + proj4ColdResult.getCoordinate().x, + gtColdResult.getCoordinate().x, + 1.0); // 1 meter tolerance + assertEquals(proj4ColdResult.getCoordinate().y, gtColdResult.getCoordinate().y, 1.0); + } + + // ==================== 2.1 Cache Effect: Built-in EPSG ==================== + + @Test + public void testCacheEffectBuiltInEpsgCode() { + printHeader("2.1 Cache Effect: Built-in EPSG (EPSG:4326 -> EPSG:3857)"); + + Point point = createTestPoint(SF_LON, SF_LAT); + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Built-in EPSG", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + assertEquals(3857, coldResult.getSRID()); + } + + // ==================== 2.2 Cache Effect: Remote Fetch EPSG ==================== + + @Test + public void testCacheEffectRemoteFetchEpsgCode() { + printHeader("2.2 Cache Effect: Remote Fetch EPSG (EPSG:2154 - French Lambert)"); + + // EPSG:2154 (RGF93 / Lambert-93) is NOT in the built-in list + // It requires fetching from spatialreference.org + + Point point = createTestPoint(2.3522, 48.8566); // Paris coordinates + Proj4.clearCache(); + Defs.reset(); // Clear fetched definitions + + try { + // Cold call (network fetch) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:2154"); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:2154"); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Remote Fetch EPSG", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Note: Cold time includes network fetch from spatialreference.org%n"); + assertNotNull(coldResult); + assertEquals(2154, coldResult.getSRID()); + } catch (Exception e) { + System.out.println("Skipped: Network fetch failed - " + e.getMessage()); + // Don't fail the test if network is unavailable + } + } + + // ==================== 2.3 Cache Effect: PROJ String ==================== + + @Test + public void testCacheEffectProjString() { + printHeader("2.3 Cache Effect: PROJ String"); + + Point point = createTestPoint(SF_LON, SF_LAT); + String sourceCRS = "+proj=longlat +datum=WGS84 +no_defs"; + String targetCRS = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs"; + + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("PROJ String", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + // Web Mercator coordinates + assertEquals(-13627665.27, coldResult.getCoordinate().x, 1.0); + assertEquals(4547675.35, coldResult.getCoordinate().y, 1.0); + } + + @Test + public void testCacheEffectWktString() { + printHeader("2.3 Cache Effect: WKT String"); + + Point point = createTestPoint(120, 60); + String sourceWkt = + "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]]"; + String targetWkt = + "PROJCS[\"WGS 84 / UTM zone 51N\",GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",123],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1]]"; + + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceWkt, targetWkt); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceWkt, targetWkt); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("WKT String", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + } + + // ==================== 2.4 Cache Effect: Grid Files ==================== + + @Test + public void testCacheEffectGridFileLocal() { + printHeader("2.4 Cache Effect: Grid File (local)"); + + Path gridPath = Path.of("src/test/resources/grids/ca_nrc_ntv2_0.tif").toAbsolutePath(); + assumeTrue("Grid file not found: " + gridPath, Files.exists(gridPath)); + + // Toronto coordinates for Canadian grid + Point point = createTestPoint(-79.3832, 43.6532); + String sourceCRS = "+proj=longlat +datum=NAD27 +nadgrids=" + gridPath + " +no_defs"; + String targetCRS = "EPSG:4326"; // WGS84 + + Proj4.clearCache(); + NadgridRegistry.clear(); + + // Cold call (loads grid file) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Grid File (local)", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Grid cache entries: %10d%n", NadgridRegistry.size()); + System.out.printf("Note: Cold time includes loading grid file from disk%n"); + assertNotNull(coldResult); + assertTrue(NadgridRegistry.size() > 0); + } + + @Test + public void testCacheEffectGridFileRemote() { + printHeader("2.4 Cache Effect: Grid File (remote)"); + + // Use OSTN15 grid file from GitHub + Point point = createTestPoint(-0.1276, 51.5074); // London coordinates + String sourceCRS = "+proj=longlat +ellps=GRS80 +nadgrids=" + REMOTE_GRID_URL + " +no_defs"; + String targetCRS = "+proj=longlat +ellps=airy +no_defs"; + + Proj4.clearCache(); + NadgridRegistry.clear(); + + try { + // Cold call (downloads grid file) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Grid File (remote)", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Grid cache entries: %10d%n", NadgridRegistry.size()); + System.out.printf("Note: Cold time includes downloading grid file (~15MB)%n"); + assertNotNull(coldResult); + assertTrue(NadgridRegistry.size() > 0); + } catch (Exception e) { + System.out.println("Skipped: Remote grid download failed - " + e.getMessage()); + // Don't fail the test if network is unavailable + } + } +} diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java new file mode 100644 index 00000000000..903bf2f9d30 --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java @@ -0,0 +1,576 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common; + +import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; + +import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.Test; +import org.locationtech.jts.geom.*; +import org.locationtech.jts.io.WKTReader; + +/** + * Comprehensive tests for FunctionsProj4 CRS transformation. + * + *

Tests cover: + * + *

+ */ +public class FunctionsProj4Test extends TestBase { + + private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); + private static final WKTReader WKT_READER = new WKTReader(); + private static final double FP_TOLERANCE = 1e-6; + + // ==================== EPSG Code Tests ==================== + + @Test + public void testTransformEpsgToEpsg() { + // WGS84 to Web Mercator + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + point.setSRID(4326); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + // San Francisco in Web Mercator coordinates + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + assertEquals(4547675.35, result.getCoordinate().y, 1.0); + } + + @Test + public void testTransformEpsgUsingGeometrySRID() { + // Test using geometry's SRID as source CRS + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + point.setSRID(4326); + + Geometry result = FunctionsProj4.transform(point, "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + } + + @Test + public void testTransformLosAngelesCoordinates() { + // Test with coordinates near Los Angeles (-117.99, 32.01) + // This is the coordinate used in Flink tests + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-117.99, 32.01)); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + // Expected values from Web Mercator formula: + // x = lon * 20037508.34 / 180 = -13134586.72 + // y = ln(tan((90 + lat) * pi / 360)) * 20037508.34 / pi = 3764623.35 + assertEquals(-13134586.72, result.getCoordinate().x, 1.0); + assertEquals(3764623.35, result.getCoordinate().y, 1.0); + } + + @Test + public void testTransformUtmZone() { + // WGS84 to UTM Zone 10N (San Francisco) + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:32610"); + + assertNotNull(result); + assertEquals(32610, result.getSRID()); + // UTM coordinates for San Francisco + assertTrue(result.getCoordinate().x > 540000 && result.getCoordinate().x < 560000); + assertTrue(result.getCoordinate().y > 4170000 && result.getCoordinate().y < 4190000); + } + + // ==================== PROJ String Tests ==================== + + @Test + public void testTransformProjString() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String sourceCRS = "+proj=longlat +datum=WGS84 +no_defs"; + String targetCRS = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs"; + + Geometry result = FunctionsProj4.transform(point, sourceCRS, targetCRS); + + assertNotNull(result); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + assertEquals(4547675.35, result.getCoordinate().y, 1.0); + } + + @Test + public void testTransformProjStringToEpsg() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String sourceCRS = "+proj=longlat +datum=WGS84 +no_defs"; + + Geometry result = FunctionsProj4.transform(point, sourceCRS, "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + } + + @Test + public void testTransformProjStringUtm() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String sourceCRS = "+proj=longlat +datum=WGS84 +no_defs"; + String targetCRS = "+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs"; + + Geometry result = FunctionsProj4.transform(point, sourceCRS, targetCRS); + + assertNotNull(result); + assertTrue(result.getCoordinate().x > 540000 && result.getCoordinate().x < 560000); + } + + // ==================== WKT1 Tests ==================== + + @Test + public void testTransformWkt1() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(120, 60)); + + String sourceWkt = + "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]]"; + String targetWkt = + "PROJCS[\"WGS 84 / UTM zone 51N\",GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",123],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1]]"; + + Geometry result = FunctionsProj4.transform(point, sourceWkt, targetWkt); + + assertNotNull(result); + // Should be projected coordinates + assertTrue(Math.abs(result.getCoordinate().x) > 100000); + assertTrue(Math.abs(result.getCoordinate().y) > 1000000); + } + + // ==================== WKT2 Tests ==================== + + @Test + public void testTransformWkt2() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String wkt2 = + "GEOGCRS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",ELLIPSOID[\"WGS 84\",6378137,298.257223563]],CS[ellipsoidal,2],AXIS[\"latitude\",north],AXIS[\"longitude\",east],UNIT[\"degree\",0.0174532925199433]]"; + + Geometry result = FunctionsProj4.transform(point, wkt2, "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + } + + // ==================== PROJJSON Tests ==================== + + @Test + public void testTransformProjJson() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String projJson = + "{" + + "\"type\": \"GeographicCRS\"," + + "\"name\": \"WGS 84\"," + + "\"datum\": {" + + "\"type\": \"GeodeticReferenceFrame\"," + + "\"name\": \"World Geodetic System 1984\"," + + "\"ellipsoid\": {" + + "\"name\": \"WGS 84\"," + + "\"semi_major_axis\": 6378137," + + "\"inverse_flattening\": 298.257223563" + + "}" + + "}" + + "}"; + + Geometry result = FunctionsProj4.transform(point, projJson, "EPSG:3857"); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + } + + @Test + public void testTransformProjJsonProjected() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + String projJson = + "{" + + "\"type\": \"ProjectedCRS\"," + + "\"name\": \"WGS 84 / UTM zone 10N\"," + + "\"base_crs\": {" + + "\"type\": \"GeographicCRS\"," + + "\"datum\": {" + + "\"ellipsoid\": {" + + "\"semi_major_axis\": 6378137," + + "\"inverse_flattening\": 298.257223563" + + "}" + + "}" + + "}," + + "\"conversion\": {" + + "\"method\": {\"name\": \"Transverse Mercator\"}," + + "\"parameters\": [" + + "{\"name\": \"Latitude of natural origin\", \"value\": 0, \"unit\": {\"conversion_factor\": 0.0174532925199433}}," + + "{\"name\": \"Longitude of natural origin\", \"value\": -123, \"unit\": {\"conversion_factor\": 0.0174532925199433}}," + + "{\"name\": \"Scale factor at natural origin\", \"value\": 0.9996}," + + "{\"name\": \"False easting\", \"value\": 500000}," + + "{\"name\": \"False northing\", \"value\": 0}" + + "]" + + "}" + + "}"; + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", projJson); + + assertNotNull(result); + // UTM coordinates + assertTrue(result.getCoordinate().x > 500000 && result.getCoordinate().x < 600000); + assertTrue(result.getCoordinate().y > 4000000 && result.getCoordinate().y < 5000000); + } + + // ==================== NAD Grid Tests (Local Files) ==================== + + @Test + public void testNadgridLocalFileAbsolute() { + Path gridPath = Path.of("src/test/resources/grids/ca_nrc_ntv2_0.tif").toAbsolutePath(); + assumeTrue("Grid file not found", Files.exists(gridPath)); + + // Toronto coordinates in NAD27 + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-79.3832, 43.6532)); + + String sourceCRS = "+proj=longlat +datum=NAD27 +nadgrids=" + gridPath + " +no_defs"; + String targetCRS = "EPSG:4326"; // WGS84 + + Geometry result = FunctionsProj4.transform(point, sourceCRS, targetCRS); + + assertNotNull(result); + // Grid shift should be small but non-zero + assertNotEquals(point.getCoordinate().x, result.getCoordinate().x, 1e-6); + assertNotEquals(point.getCoordinate().y, result.getCoordinate().y, 1e-6); + // But should be close (< 0.001 degrees) + assertEquals(point.getCoordinate().x, result.getCoordinate().x, 0.001); + assertEquals(point.getCoordinate().y, result.getCoordinate().y, 0.001); + } + + @Test + public void testNadgridLocalFileRelative() { + String relativePath = "./src/test/resources/grids/us_noaa_conus.tif"; + assumeTrue("Grid file not found", Files.exists(Path.of(relativePath))); + + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-77.0, 38.9)); + + String sourceCRS = "+proj=longlat +datum=NAD83 +nadgrids=" + relativePath + " +no_defs"; + + // Should not throw - grid file is found + Geometry result = FunctionsProj4.transform(point, sourceCRS, "EPSG:4326"); + assertNotNull(result); + } + + @Test + public void testNadgridLocalFileOptionalNotFound() { + // Optional grid (with @) should NOT throw when file doesn't exist + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-79.0, 43.0)); + + String sourceCRS = "+proj=longlat +datum=NAD27 +nadgrids=@/nonexistent/path/grid.gsb +no_defs"; + String targetCRS = "EPSG:4326"; + + // Should not throw - optional grid just gets skipped + Geometry result = FunctionsProj4.transform(point, sourceCRS, targetCRS); + assertNotNull(result); + } + + @Test + public void testNadgridLocalFileMandatoryNotFound() { + // Mandatory grid (without @) SHOULD throw when file doesn't exist + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-79.0, 43.0)); + + String sourceCRS = "+proj=longlat +datum=NAD27 +nadgrids=/nonexistent/path/grid.gsb +no_defs"; + String targetCRS = "EPSG:4326"; + + try { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + fail("Expected exception for mandatory grid not found"); + } catch (RuntimeException e) { + // Expected + assertTrue(e.getMessage().contains("grid") || e.getCause() != null); + } + } + + // ==================== Geometry Type Tests ==================== + + @Test + public void testTransformLineString() throws Exception { + LineString line = + (LineString) WKT_READER.read("LINESTRING(-122.4 37.7, -122.5 37.8, -122.6 37.9)"); + + Geometry result = FunctionsProj4.transform(line, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof LineString); + assertEquals(3, result.getNumPoints()); + assertEquals(3857, result.getSRID()); + } + + @Test + public void testTransformPolygon() throws Exception { + Polygon polygon = + (Polygon) + WKT_READER.read( + "POLYGON((-122.5 37.7, -122.3 37.7, -122.3 37.9, -122.5 37.9, -122.5 37.7))"); + + Geometry result = FunctionsProj4.transform(polygon, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof Polygon); + assertEquals(3857, result.getSRID()); + } + + @Test + public void testTransformPolygonWithHole() throws Exception { + Polygon polygon = + (Polygon) + WKT_READER.read( + "POLYGON((-122.5 37.7, -122.3 37.7, -122.3 37.9, -122.5 37.9, -122.5 37.7), " + + "(-122.45 37.75, -122.35 37.75, -122.35 37.85, -122.45 37.85, -122.45 37.75))"); + + Geometry result = FunctionsProj4.transform(polygon, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof Polygon); + assertEquals(1, ((Polygon) result).getNumInteriorRing()); + } + + @Test + public void testTransformMultiPoint() throws Exception { + MultiPoint multiPoint = + (MultiPoint) WKT_READER.read("MULTIPOINT((-122.4 37.7), (-122.5 37.8), (-122.6 37.9))"); + + Geometry result = FunctionsProj4.transform(multiPoint, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof MultiPoint); + assertEquals(3, result.getNumGeometries()); + assertEquals(3857, result.getSRID()); + } + + @Test + public void testTransformMultiLineString() throws Exception { + MultiLineString multiLine = + (MultiLineString) + WKT_READER.read( + "MULTILINESTRING((-122.4 37.7, -122.5 37.8), (-122.6 37.9, -122.7 38.0))"); + + Geometry result = FunctionsProj4.transform(multiLine, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof MultiLineString); + assertEquals(2, result.getNumGeometries()); + } + + @Test + public void testTransformMultiPolygon() throws Exception { + MultiPolygon multiPolygon = + (MultiPolygon) + WKT_READER.read( + "MULTIPOLYGON(((-122.5 37.7, -122.4 37.7, -122.4 37.8, -122.5 37.8, -122.5 37.7)), " + + "((-122.3 37.6, -122.2 37.6, -122.2 37.7, -122.3 37.7, -122.3 37.6)))"); + + Geometry result = FunctionsProj4.transform(multiPolygon, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof MultiPolygon); + assertEquals(2, result.getNumGeometries()); + } + + @Test + public void testTransformGeometryCollection() throws Exception { + GeometryCollection collection = + (GeometryCollection) + WKT_READER.read( + "GEOMETRYCOLLECTION(POINT(-122.4 37.7), LINESTRING(-122.5 37.8, -122.6 37.9))"); + + Geometry result = FunctionsProj4.transform(collection, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result instanceof GeometryCollection); + assertEquals(2, result.getNumGeometries()); + } + + @Test + public void testTransformPoint3D() { + // Test 3D coordinate preservation + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749, 100.0)); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertFalse(Double.isNaN(result.getCoordinate().getZ())); + } + + // ==================== Edge Cases ==================== + + @Test + public void testTransformNullGeometry() { + Geometry result = FunctionsProj4.transform(null, "EPSG:4326", "EPSG:3857"); + assertNull(result); + } + + @Test + public void testTransformEmptyGeometry() { + Point emptyPoint = GEOMETRY_FACTORY.createPoint(); + + Geometry result = FunctionsProj4.transform(emptyPoint, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + @Test + public void testTransformSameCRS() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + point.setSRID(4326); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:4326"); + + assertNotNull(result); + assertEquals(4326, result.getSRID()); + // Coordinates should be unchanged + assertEquals(point.getCoordinate().x, result.getCoordinate().x, FP_TOLERANCE); + assertEquals(point.getCoordinate().y, result.getCoordinate().y, FP_TOLERANCE); + } + + @Test + public void testTransformSameCRSUpdatesSRID() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + point.setSRID(0); // No SRID set + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:4326"); + + assertNotNull(result); + assertEquals(4326, result.getSRID()); + } + + @Test(expected = IllegalArgumentException.class) + public void testTransformNoSourceCRS() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + // No SRID set and no source CRS provided + FunctionsProj4.transform(point, "EPSG:3857"); + } + + @Test(expected = Exception.class) + public void testTransformInvalidCRS() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + FunctionsProj4.transform(point, "EPSG:4326", "INVALID:CRS"); + } + + // ==================== UserData Preservation ==================== + + @Test + public void testTransformPreservesUserData() { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + String userData = "test-user-data"; + point.setUserData(userData); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + + assertNotNull(result); + assertEquals(userData, result.getUserData()); + } + + // ==================== 4-arg (lenient parameter) Tests ==================== + + @Test + public void testTransform4ArgLenientTrue() { + // Test 4-arg version with lenient=true + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857", true); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + // Same result as 3-arg version + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + assertEquals(4547675.35, result.getCoordinate().y, 1.0); + } + + @Test + public void testTransform4ArgLenientFalse() { + // Test 4-arg version with lenient=false + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857", false); + + assertNotNull(result); + assertEquals(3857, result.getSRID()); + // Same result as 3-arg version + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + assertEquals(4547675.35, result.getCoordinate().y, 1.0); + } + + @Test + public void testTransform4ArgLenientIgnored() { + // Verify lenient parameter is ignored - both true and false produce identical results + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + Geometry resultLenientTrue = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857", true); + Geometry resultLenientFalse = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857", false); + Geometry result3Arg = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + + // All three should produce identical coordinates + assertEquals(resultLenientTrue.getCoordinate().x, resultLenientFalse.getCoordinate().x, 1e-9); + assertEquals(resultLenientTrue.getCoordinate().y, resultLenientFalse.getCoordinate().y, 1e-9); + assertEquals(resultLenientTrue.getCoordinate().x, result3Arg.getCoordinate().x, 1e-9); + assertEquals(resultLenientTrue.getCoordinate().y, result3Arg.getCoordinate().y, 1e-9); + } + + @Test + public void testTransform4ArgWithProjString() { + // Test 4-arg version with PROJ string + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + String projString = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs"; + + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", projString, true); + + assertNotNull(result); + assertEquals(-13627665.27, result.getCoordinate().x, 1.0); + assertEquals(4547675.35, result.getCoordinate().y, 1.0); + } + + // ==================== Round-trip Tests ==================== + + @Test + public void testRoundTrip() { + Point original = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + + // Transform to Web Mercator and back + Geometry projected = FunctionsProj4.transform(original, "EPSG:4326", "EPSG:3857"); + Geometry backToWgs84 = FunctionsProj4.transform(projected, "EPSG:3857", "EPSG:4326"); + + assertNotNull(backToWgs84); + assertEquals(original.getCoordinate().x, backToWgs84.getCoordinate().x, 1e-9); + assertEquals(original.getCoordinate().y, backToWgs84.getCoordinate().y, 1e-9); + } +} diff --git a/common/src/test/resources/grids/ca_nrc_ntv2_0.tif b/common/src/test/resources/grids/ca_nrc_ntv2_0.tif new file mode 100644 index 00000000000..6395bf20175 Binary files /dev/null and b/common/src/test/resources/grids/ca_nrc_ntv2_0.tif differ diff --git a/common/src/test/resources/grids/us_noaa_conus.tif b/common/src/test/resources/grids/us_noaa_conus.tif new file mode 100644 index 00000000000..88c4d00b616 Binary files /dev/null and b/common/src/test/resources/grids/us_noaa_conus.tif differ diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index a19ff515995..58871582a1c 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -4275,60 +4275,17 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: -Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. For SourceCRS and TargetCRS, WKT format is also available since v1.3.1. +Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. -**Lon/Lat Order in the input geometry** +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. -If the input geometry is in lat/lon order, it might throw an error such as `too close to pole`, `latitude or longitude exceeded limits`, or give unexpected results. -You need to make sure that the input geometry is in lon/lat order. If the input geometry is in lat/lon order, you can use ==ST_FlipCoordinates== to swap X and Y. - -**Lon/Lat Order in the source and target CRS** - -Sedona will force the source and target CRS to be in lon/lat order. If the source CRS or target CRS is in lat/lon order, it will be swapped to lon/lat order. - -**CRS code** - -The CRS code is the code of the CRS in the official EPSG database (https://epsg.org/) in the format of `EPSG:XXXX`. A community tool [EPSG.io](https://epsg.io/) can help you quick identify a CRS code. For example, the code of WGS84 is `EPSG:4326`. - -**WKT format** - -You can also use OGC WKT v1 format to specify the source CRS and target CRS. An example OGC WKT v1 CRS of `EPGS:3857` is as follows: - -``` -PROJCS["WGS 84 / Pseudo-Mercator", - GEOGCS["WGS 84", - DATUM["WGS_1984", - SPHEROID["WGS 84",6378137,298.257223563, - AUTHORITY["EPSG","7030"]], - AUTHORITY["EPSG","6326"]], - PRIMEM["Greenwich",0, - AUTHORITY["EPSG","8901"]], - UNIT["degree",0.0174532925199433, - AUTHORITY["EPSG","9122"]], - AUTHORITY["EPSG","4326"]], - PROJECTION["Mercator_1SP"], - PARAMETER["central_meridian",0], - PARAMETER["scale_factor",1], - PARAMETER["false_easting",0], - PARAMETER["false_northing",0], - UNIT["metre",1, - AUTHORITY["EPSG","9001"]], - AXIS["Easting",EAST], - AXIS["Northing",NORTH], - EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"], - AUTHORITY["EPSG","3857"]] -``` - -!!!note - By default, this function uses lon/lat order since `v1.5.0`. Before, it used lat/lon order. - -!!!note - By default, ==ST_Transform== follows the `lenient` mode which tries to fix issues by itself. You can append a boolean value at the end to enable the `strict` mode. In `strict` mode, ==ST_Transform== will throw an error if it finds any issue. +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and examples, see the Spark SQL [CRS Transformation](../sql/CRS-Transformation.md) documentation. Format: ``` -ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String, [Optional] lenientMode: Boolean) +ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String) ``` Since: `v1.2.0` @@ -4339,10 +4296,6 @@ Example: SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649')) ``` -```sql -SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649', false)) -``` - Output: ``` diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index 65376b6ce04..fdb09919fba 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -3388,33 +3388,24 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. -For SourceCRS and TargetCRS, WKT format is also available. -!!!note - By default, this function uses lat/lon order. You can use ==ST_FlipCoordinates== to swap X and Y. +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. + +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and examples, see the Spark SQL [CRS Transformation](../../sql/CRS-Transformation.md) documentation. !!!note - If ==ST_Transform== throws an Exception called "Bursa wolf parameters required", you need to disable the error notification in ST_Transform. You can append a boolean value at the end. + By default, this function uses lat/lon order. You can use ==ST_FlipCoordinates== to swap X and Y. -Format: `ST_Transform (A:geometry, SourceCRS:string, TargetCRS:string ,[Optional] DisableError)` +Format: `ST_Transform (A:geometry, SourceCRS:string, TargetCRS:string)` -SQL example (simple): +SQL example: ```sql SELECT ST_Transform(polygondf.countyshape, 'epsg:4326','epsg:3857') FROM polygondf ``` -SQL example (with optional parameters): - -```sql -SELECT ST_Transform(polygondf.countyshape, 'epsg:4326','epsg:3857', false) -FROM polygondf -``` - -!!!note - The detailed EPSG information can be searched on [EPSG.io](https://epsg.io/). - ## ST_Translate Introduction: Returns the input geometry with its X, Y and Z coordinates (if present in the geometry) translated by deltaX, deltaY and deltaZ (if specified) diff --git a/docs/api/sql/CRS-Transformation.md b/docs/api/sql/CRS-Transformation.md new file mode 100644 index 00000000000..a61533aab5c --- /dev/null +++ b/docs/api/sql/CRS-Transformation.md @@ -0,0 +1,285 @@ + + +# CRS Transformation + +Sedona provides coordinate reference system (CRS) transformation through the `ST_Transform` function. Since v1.9.0, Sedona uses the proj4sedona library, a pure Java implementation that supports multiple CRS input formats and grid-based transformations. + +## Supported CRS Formats + +Sedona supports the following formats for specifying source and target coordinate reference systems: + +### Authority Code + +The most common way to specify a CRS is using an authority code in the format `AUTHORITY:CODE`. Sedona uses [spatialreference.org](https://spatialreference.org/projjson_index.json) as an open-source CRS database, which supports multiple authorities: + +| Authority | Description | Example | +|-----------|-------------|---------| +| EPSG | European Petroleum Survey Group | `EPSG:4326`, `EPSG:3857` | +| ESRI | Esri coordinate systems | `ESRI:102008`, `ESRI:54012` | +| IAU | International Astronomical Union (planetary CRS) | `IAU:30100` | +| SR-ORG | User-contributed definitions | `SR-ORG:6864` | + +```sql +-- Transform from WGS84 (EPSG:4326) to Web Mercator (EPSG:3857) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'EPSG:3857' +) AS transformed_point +``` + +Output: + +``` +POINT (-13627665.271218014 4548257.702387721) +``` + +```sql +-- Transform using ESRI authority code (North America Albers Equal Area Conic) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'ESRI:102008' +) AS transformed_point +``` + +```sql +-- Transform from WGS84 to UTM Zone 10N (EPSG:32610) +SELECT ST_Transform( + ST_GeomFromText('POLYGON((-122.5 37.5, -122.5 38.0, -122.0 38.0, -122.0 37.5, -122.5 37.5))'), + 'EPSG:4326', + 'EPSG:32610' +) AS transformed_polygon +``` + +You can browse available CRS codes at [spatialreference.org](https://spatialreference.org/projjson_index.json) or [EPSG.io](https://epsg.io/). + +### WKT1 (OGC Well-Known Text) + +WKT1 is the OGC Well-Known Text format for CRS definitions. It starts with `PROJCS[...]` for projected CRS or `GEOGCS[...]` for geographic CRS. + +```sql +-- Transform using WKT1 format for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'PROJCS["WGS 84 / Pseudo-Mercator", + GEOGCS["WGS 84", + DATUM["WGS_1984", + SPHEROID["WGS 84",6378137,298.257223563]], + PRIMEM["Greenwich",0], + UNIT["degree",0.0174532925199433]], + PROJECTION["Mercator_1SP"], + PARAMETER["central_meridian",0], + PARAMETER["scale_factor",1], + PARAMETER["false_easting",0], + PARAMETER["false_northing",0], + UNIT["metre",1], + AUTHORITY["EPSG","3857"]]' +) AS transformed_point +``` + +### WKT2 (ISO 19162:2019) + +WKT2 is the modern ISO 19162:2019 standard format. It starts with `PROJCRS[...]` for projected CRS or `GEOGCRS[...]` for geographic CRS. + +```sql +-- Transform using WKT2 format for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'PROJCRS["WGS 84 / UTM zone 10N", + BASEGEOGCRS["WGS 84", + DATUM["World Geodetic System 1984", + ELLIPSOID["WGS 84",6378137,298.257223563]]], + CONVERSION["UTM zone 10N", + METHOD["Transverse Mercator"], + PARAMETER["Latitude of natural origin",0], + PARAMETER["Longitude of natural origin",-123], + PARAMETER["Scale factor at natural origin",0.9996], + PARAMETER["False easting",500000], + PARAMETER["False northing",0]], + CS[Cartesian,2], + AXIS["easting",east], + AXIS["northing",north], + UNIT["metre",1], + ID["EPSG",32610]]' +) AS transformed_point +``` + +### PROJ String + +PROJ strings provide a compact way to define CRS using projection parameters. They start with `+proj=`. + +```sql +-- Transform using PROJ string for UTM Zone 10N +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=WGS84 +no_defs', + '+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs' +) AS transformed_point +``` + +```sql +-- Transform using PROJ string for Lambert Conformal Conic +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + '+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs' +) AS transformed_point +``` + +### PROJJSON + +PROJJSON is a JSON representation of CRS, useful when working with JSON-based workflows. + +```sql +-- Transform using PROJJSON for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + '{ + "type": "ProjectedCRS", + "name": "WGS 84 / UTM zone 10N", + "base_crs": { + "name": "WGS 84", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "World Geodetic System 1984", + "ellipsoid": { + "name": "WGS 84", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + {"name": "Longitude", "abbreviation": "lon", "direction": "east", "unit": "degree"}, + {"name": "Latitude", "abbreviation": "lat", "direction": "north", "unit": "degree"} + ] + } + }, + "conversion": { + "name": "UTM zone 10N", + "method": {"name": "Transverse Mercator"}, + "parameters": [ + {"name": "Latitude of natural origin", "value": 0, "unit": "degree"}, + {"name": "Longitude of natural origin", "value": -123, "unit": "degree"}, + {"name": "Scale factor at natural origin", "value": 0.9996}, + {"name": "False easting", "value": 500000, "unit": "metre"}, + {"name": "False northing", "value": 0, "unit": "metre"} + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + {"name": "Easting", "abbreviation": "E", "direction": "east", "unit": "metre"}, + {"name": "Northing", "abbreviation": "N", "direction": "north", "unit": "metre"} + ] + }, + "id": {"authority": "EPSG", "code": 32610} + }' +) AS transformed_point +``` + +## Grid File Support + +Grid files enable high-accuracy datum transformations, such as NAD27 to NAD83 or OSGB36 to ETRS89. Sedona supports loading grid files from multiple sources. + +### Grid File Sources + +Grid files can be specified using the `+nadgrids` parameter in PROJ strings: + +| Source | Format | Example | +|--------|--------|---------| +| Local file | Absolute path | `+nadgrids=/path/to/grid.gsb` | +| PROJ CDN | `@` prefix | `+nadgrids=@us_noaa_conus.tif` | +| HTTPS URL | Full URL | `+nadgrids=https://cdn.proj.org/us_noaa_conus.tif` | + +When using the `@` prefix, grid files are automatically fetched from [PROJ CDN](https://cdn.proj.org/). + +### Optional vs Mandatory Grids + +- **`@` prefix (optional)**: The transformation continues without the grid if it's unavailable. Use this when the grid improves accuracy but isn't required. +- **No prefix (mandatory)**: An error is thrown if the grid file cannot be found. + +### SQL Examples with Grid Files + +```sql +-- Transform NAD27 to NAD83 using PROJ CDN grid (optional) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=NAD27 +no_defs +nadgrids=@us_noaa_conus.tif', + 'EPSG:4269' +) AS transformed_point +``` + +```sql +-- Transform using mandatory grid file (error if not found) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=NAD27 +no_defs +nadgrids=us_noaa_conus.tif', + 'EPSG:4269' +) AS transformed_point +``` + +```sql +-- Transform OSGB36 to ETRS89 using UK grid +SELECT ST_Transform( + ST_GeomFromText('POINT(-0.1276 51.5074)'), + '+proj=longlat +datum=OSGB36 +nadgrids=@uk_os_OSTN15_NTv2_OSGBtoETRS.gsb +no_defs', + 'EPSG:4258' +) AS transformed_point +``` + +## Coordinate Order + +Sedona expects geometries to be in **longitude/latitude (lon/lat)** order. If your data is in lat/lon order, use `ST_FlipCoordinates` to swap the coordinates before transformation. + +```sql +-- If your data is in lat/lon order, flip first +SELECT ST_Transform( + ST_FlipCoordinates(ST_GeomFromText('POINT(37.7749 -122.4194)')), + 'EPSG:4326', + 'EPSG:3857' +) AS transformed_point +``` + +Sedona automatically handles coordinate order in the CRS definition, ensuring the source and target CRS use lon/lat order internally. + +## Using Geometry SRID + +If the geometry already has an SRID set, you can omit the source CRS parameter: + +```sql +-- Set SRID on geometry and transform using only target CRS +SELECT ST_Transform( + ST_SetSRID(ST_GeomFromText('POINT(-122.4194 37.7749)'), 4326), + 'EPSG:3857' +) AS transformed_point +``` + +## See Also + +- [ST_Transform](Function.md#st_transform) - Function reference +- [ST_SetSRID](Function.md#st_setsrid) - Set the SRID of a geometry +- [ST_SRID](Function.md#st_srid) - Get the SRID of a geometry +- [ST_FlipCoordinates](Function.md#st_flipcoordinates) - Swap X and Y coordinates diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index f812009568a..31a8f3d6828 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -4648,62 +4648,15 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: -Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. For SourceCRS and TargetCRS, WKT format is also available since `v1.3.1`. Since `v1.5.1`, if the `SourceCRS` is not specified, CRS will be fetched from the geometry using [ST_SRID](#st_srid). +Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. If the `SourceCRS` is not specified, CRS will be fetched from the geometry using [ST_SRID](#st_srid). -**Lon/Lat Order in the input geometry** +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. -If the input geometry is in lat/lon order, it might throw an error such as `too close to pole`, `latitude or longitude exceeded limits`, or give unexpected results. -You need to make sure that the input geometry is in lon/lat order. If the input geometry is in lat/lon order, you can use ==ST_FlipCoordinates== to swap X and Y. - -**Lon/Lat Order in the source and target CRS** - -Sedona will make sure the source and target CRS to be in lon/lat order. If the source CRS or target CRS is in lat/lon order, these CRS will be swapped to lon/lat order. - -**CRS code** - -The CRS code is the code of the CRS in the official EPSG database (https://epsg.org/) in the format of `EPSG:XXXX`. A community tool [EPSG.io](https://epsg.io/) can help you quick identify a CRS code. For example, the code of WGS84 is `EPSG:4326`. - -**WKT format** - -You can also use OGC WKT v1 format to specify the source CRS and target CRS. An example OGC WKT v1 CRS of `EPGS:3857` is as follows: - -``` -PROJCS["WGS 84 / Pseudo-Mercator", - GEOGCS["WGS 84", - DATUM["WGS_1984", - SPHEROID["WGS 84",6378137,298.257223563, - AUTHORITY["EPSG","7030"]], - AUTHORITY["EPSG","6326"]], - PRIMEM["Greenwich",0, - AUTHORITY["EPSG","8901"]], - UNIT["degree",0.0174532925199433, - AUTHORITY["EPSG","9122"]], - AUTHORITY["EPSG","4326"]], - PROJECTION["Mercator_1SP"], - PARAMETER["central_meridian",0], - PARAMETER["scale_factor",1], - PARAMETER["false_easting",0], - PARAMETER["false_northing",0], - UNIT["metre",1, - AUTHORITY["EPSG","9001"]], - AXIS["Easting",EAST], - AXIS["Northing",NORTH], - EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"], - AUTHORITY["EPSG","3857"]] -``` - -!!!note - By default, this function uses lon/lat order since `v1.5.0`. Before, it used lat/lon order. - -!!!note - By default, ==ST_Transform== follows the `lenient` mode which tries to fix issues by itself. You can append a boolean value at the end to enable the `strict` mode. In `strict` mode, ==ST_Transform== will throw an error if it finds any issue. +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and more examples, see [CRS Transformation](CRS-Transformation.md). Format: -``` -ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String, lenientMode: Boolean) -``` - ``` ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String) ``` @@ -4720,10 +4673,6 @@ SQL Example SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649')) ``` -```sql -SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649', false)) -``` - Output: ``` diff --git a/docs/api/sql/Parameter.md b/docs/api/sql/Parameter.md index 9510efdbcc3..b0888211b88 100644 --- a/docs/api/sql/Parameter.md +++ b/docs/api/sql/Parameter.md @@ -100,3 +100,14 @@ If you set the same parameter through both `sedona` and `spark.sedona` prefixes, * Enable the parser extension to parse GEOMETRY data type in SQL DDL statements * Default: true * Possible values: true, false + +## CRS Transformation + +* spark.sedona.crs.geotools + * Controls which library is used for CRS transformations in ST_Transform + * Default: raster + * Possible values: + * none: Use proj4sedona for all transformations + * raster: Use proj4sedona for vector transformations, GeoTools for raster transformations + * all: Use GeoTools for all transformations (legacy behavior) + * Since: v1.9.0 diff --git a/docs/setup/maven-coordinates.md b/docs/setup/maven-coordinates.md index 0da7eb39731..d57f73eebb4 100644 --- a/docs/setup/maven-coordinates.md +++ b/docs/setup/maven-coordinates.md @@ -33,7 +33,7 @@ Please use the artifact with Spark major.minor version in the artifact name. For If you are using the Scala 2.13 builds of Spark, please use the corresponding packages for Scala 2.13, which are suffixed by `_2.13`. -The optional GeoTools library is required if you want to use CRS transformation, ShapefileReader or GeoTiff reader. This wrapper library is a re-distribution of GeoTools official jars. The only purpose of this library is to bring GeoTools jars from OSGEO repository to Maven Central. This library is under GNU Lesser General Public License (LGPL) license so we cannot package it in Sedona official release. +The optional GeoTools library is required if you want to use raster operators. Vector CRS transformation (ST_Transform) now uses the built-in proj4sedona library by default and does not require GeoTools. This wrapper library is a re-distribution of GeoTools official jars. The only purpose of this library is to bring GeoTools jars from OSGEO repository to Maven Central. This library is under GNU Lesser General Public License (LGPL) license so we cannot package it in Sedona official release. !!! abstract "Sedona with Apache Spark and Scala 2.12" @@ -180,7 +180,7 @@ Please use the artifacts with Spark major.minor version in the artifact name. Fo If you are using the Scala 2.13 builds of Spark, please use the corresponding packages for Scala 2.13, which are suffixed by `_2.13`. -The optional GeoTools library is required if you want to use CRS transformation, ShapefileReader or GeoTiff reader. This wrapper library is a re-distribution of GeoTools official jars. The only purpose of this library is to bring GeoTools jars from OSGEO repository to Maven Central. This library is under GNU Lesser General Public License (LGPL) license, so we cannot package it in Sedona official release. +The optional GeoTools library is required if you want to use raster operators. Vector CRS transformation (ST_Transform) now uses the built-in proj4sedona library by default and does not require GeoTools. This wrapper library is a re-distribution of GeoTools official jars. The only purpose of this library is to bring GeoTools jars from OSGEO repository to Maven Central. This library is under GNU Lesser General Public License (LGPL) license, so we cannot package it in Sedona official release. !!! abstract "Sedona with Apache Spark and Scala 2.12" diff --git a/flink/pom.xml b/flink/pom.xml index be4c3baba1b..5ab5464ff0c 100644 --- a/flink/pom.xml +++ b/flink/pom.xml @@ -118,6 +118,11 @@ org.geotools gt-epsg-hsql + + + org.datasyslab + proj4sedona + org.scala-lang scala-library diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index eefe48c8212..5a02e2efdb8 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -103,7 +103,7 @@ public static UserDefinedFunction[] getFuncs() { new Functions.ST_LineLocatePoint(), new Functions.ST_LocateAlong(), new Functions.ST_LongestLine(), - new FunctionsGeoTools.ST_Transform(), + new FunctionsProj4.ST_Transform(), new Functions.ST_FlipCoordinates(), new Functions.ST_GeoHash(), new Functions.ST_GeoHashNeighbors(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/FunctionsProj4.java b/flink/src/main/java/org/apache/sedona/flink/expressions/FunctionsProj4.java new file mode 100644 index 00000000000..3a97d18fd45 --- /dev/null +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/FunctionsProj4.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.flink.expressions; + +import org.apache.flink.table.annotation.DataTypeHint; +import org.apache.flink.table.functions.ScalarFunction; +import org.apache.sedona.flink.GeometryTypeSerializer; +import org.locationtech.jts.geom.Geometry; + +/** + * Flink scalar functions that use proj4sedona for CRS transformations. This provides better + * performance and broader CRS format support compared to GeoTools. + */ +public class FunctionsProj4 { + + /** + * ST_Transform using proj4sedona library for coordinate reference system transformations. + * + *

Supports the following CRS formats: + * + *

+ * + *

Note: The 4-argument version accepts a 'lenient' parameter for API compatibility, but this + * parameter is ignored by proj4sedona as it always attempts the best available transformation. + */ + public static class ST_Transform extends ScalarFunction { + + /** + * Transform geometry using the geometry's SRID as source CRS. + * + * @param o the geometry to transform (must have SRID set) + * @param targetCRS the target CRS (EPSG code, PROJ string, WKT, or PROJJSON) + * @return the transformed geometry with updated SRID + */ + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + public Geometry eval( + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + Object o, + @DataTypeHint("String") String targetCRS) { + Geometry geom = (Geometry) o; + return org.apache.sedona.common.FunctionsProj4.transform(geom, targetCRS); + } + + /** + * Transform geometry from source CRS to target CRS. + * + * @param o the geometry to transform + * @param sourceCRS the source CRS (EPSG code, PROJ string, WKT, or PROJJSON) + * @param targetCRS the target CRS (EPSG code, PROJ string, WKT, or PROJJSON) + * @return the transformed geometry with updated SRID + */ + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + public Geometry eval( + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + Object o, + @DataTypeHint("String") String sourceCRS, + @DataTypeHint("String") String targetCRS) { + Geometry geom = (Geometry) o; + return org.apache.sedona.common.FunctionsProj4.transform(geom, sourceCRS, targetCRS); + } + + /** + * Transform geometry from source CRS to target CRS. The lenient parameter is accepted for API + * compatibility but is ignored by proj4sedona. + * + * @param o the geometry to transform + * @param sourceCRS the source CRS (EPSG code, PROJ string, WKT, or PROJJSON) + * @param targetCRS the target CRS (EPSG code, PROJ string, WKT, or PROJJSON) + * @param lenient ignored by proj4sedona (kept for API compatibility) + * @return the transformed geometry with updated SRID + */ + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + public Geometry eval( + @DataTypeHint( + value = "RAW", + rawSerializer = GeometryTypeSerializer.class, + bridgedTo = Geometry.class) + Object o, + @DataTypeHint("String") String sourceCRS, + @DataTypeHint("String") String targetCRS, + @DataTypeHint("Boolean") Boolean lenient) { + Geometry geom = (Geometry) o; + return org.apache.sedona.common.FunctionsProj4.transform(geom, sourceCRS, targetCRS, lenient); + } + } +} diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java index bd310c65b78..3c9b2719415 100644 --- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java @@ -31,10 +31,9 @@ import org.apache.flink.table.api.Table; import org.apache.flink.types.Row; import org.apache.sedona.flink.expressions.Functions; -import org.apache.sedona.flink.expressions.FunctionsGeoTools; -import org.geotools.api.referencing.FactoryException; -import org.geotools.api.referencing.crs.CoordinateReferenceSystem; -import org.geotools.referencing.CRS; +import org.apache.sedona.flink.expressions.FunctionsProj4; +import org.datasyslab.proj4sedona.core.Proj; +import org.datasyslab.proj4sedona.parser.CRSSerializer; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -483,7 +482,7 @@ public void testTransform() { Table transformedTable = pointTable.select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), "epsg:4326", "epsg:3857")); @@ -498,7 +497,7 @@ public void testTransform() { pointTable .select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), "epsg:3857")) .as(pointColNames[0]) @@ -548,55 +547,65 @@ public void testUnionArrayVariant() { } @Test - public void testTransformWKT() throws FactoryException { + public void testTransformWKT() { Table pointTable = createPointTable_real(testDataSize); - CoordinateReferenceSystem CRS_SRC = CRS.decode("epsg:4326", true); - CoordinateReferenceSystem CRS_TGT = CRS.decode("epsg:3857", true); + // Use proj4sedona to generate WKT dynamically from EPSG codes + Proj srcProj = new Proj("EPSG:4326"); + Proj tgtProj = new Proj("EPSG:3857"); + String SRC_WKT = CRSSerializer.toWkt1(srcProj); + String TGT_WKT = CRSSerializer.toWkt1(tgtProj); - String SRC_WKT = CRS_SRC.toWKT(); - String TGT_WKT = CRS_TGT.toWKT(); + // Get expected result using EPSG codes + Table expectedTable = + pointTable.select( + call( + FunctionsProj4.ST_Transform.class.getSimpleName(), + $(pointColNames[0]), + "epsg:4326", + "epsg:3857")); + String expected = first(expectedTable).getField(0).toString(); Table transformedTable_SRC = pointTable.select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), SRC_WKT, "epsg:3857")); String result_SRC = first(transformedTable_SRC).getField(0).toString(); - assertEquals("POINT (-13134586.718698347 3764623.3541299687)", result_SRC); + assertEquals(expected, result_SRC); Table transformedTable_TGT = pointTable.select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), "epsg:4326", TGT_WKT)); String result_TGT = first(transformedTable_TGT).getField(0).toString(); - assertEquals("POINT (-13134586.718698347 3764623.3541299687)", result_TGT); + assertEquals(expected, result_TGT); Table transformedTable_SRC_TGT = pointTable.select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), SRC_WKT, TGT_WKT)); String result_SRC_TGT = first(transformedTable_SRC_TGT).getField(0).toString(); - assertEquals("POINT (-13134586.718698347 3764623.3541299687)", result_SRC_TGT); + assertEquals(expected, result_SRC_TGT); Table transformedTable_SRC_TGT_lenient = pointTable.select( call( - FunctionsGeoTools.ST_Transform.class.getSimpleName(), + FunctionsProj4.ST_Transform.class.getSimpleName(), $(pointColNames[0]), SRC_WKT, TGT_WKT, false)); String result_SRC_TGT_lenient = first(transformedTable_SRC_TGT_lenient).getField(0).toString(); - assertEquals("POINT (-13134586.718698347 3764623.3541299687)", result_SRC_TGT_lenient); + assertEquals(expected, result_SRC_TGT_lenient); } @Test diff --git a/mkdocs.yml b/mkdocs.yml index 27246b7280c..1e5d0a3e07a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ nav: - Aggregate function: api/sql/geography/AggregateFunction.md - DataFrame Style functions: api/sql/DataFrameAPI.md - Query optimization: api/sql/Optimizer.md + - CRS Transformation: api/sql/CRS-Transformation.md - Nearest-Neighbour searching: api/sql/NearestNeighbourSearching.md - 'Spider:Spatial Data Generator': api/sql/Spider.md - Reading Legacy Parquet Files: api/sql/Reading-legacy-parquet.md diff --git a/pom.xml b/pom.xml index 45797f99371..542c6c94659 100644 --- a/pom.xml +++ b/pom.xml @@ -96,6 +96,7 @@ 2.5.0 1.52 2.9.2 + 0.0.3 provided @@ -382,6 +383,12 @@ campskeleton 0.0.2-20260118 + + + org.datasyslab + proj4sedona + ${proj4sedona.version} + diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index ad158e69ae2..4eb8ee02888 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -1076,7 +1076,7 @@ ("point", lambda: f.lit("EPSG:4326"), lambda: f.lit("EPSG:32649")), "point_geom", "ST_ReducePrecision(geom, 2)", - "POINT (-34870890.91 1919456.06)", + "POINT (-10300622.99 19680322.66)", ), ( stf.ST_Translate, @@ -1538,7 +1538,6 @@ class TestDataFrameAPI(TestBase): - @pytest.fixture def base_df(self, request): wkbLine = "0102000000020000000000000084d600c00000000080b5d6bf00000060e1eff7bf00000080075de5bf" diff --git a/python/tests/streaming/spark/test_constructor_functions.py b/python/tests/streaming/spark/test_constructor_functions.py index cbaa6474ea1..227c8074115 100644 --- a/python/tests/streaming/spark/test_constructor_functions.py +++ b/python/tests/streaming/spark/test_constructor_functions.py @@ -117,7 +117,7 @@ .with_arguments( ["ST_GeomFromText('POINT(52.5 21.5)')", "'epsg:4326'", "'epsg:2180'"] ) - .with_expected_result(-2501415.806893427) + .with_expected_result(-2501362.200396569) # .with_expected_result("POINT (-2501415.806893427 4119952.52325666)") .with_transform("ST_Y") ), diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java index 58bf1dc79f0..b1f24e15f1a 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java @@ -1053,7 +1053,7 @@ public void test_ST_Transform() { verifySqlSingleRes( "select SEDONA.ST_AsText(SEDONA.ST_Transform(SEDONA.ST_geomFromWKT('POLYGON ((110.54671 55.818002, 110.54671 55.143743, 110.940494 55.143743, 110.940494 55.818002, 110.54671 55.818002))'),'EPSG:4326', 'EPSG:32649', false))", Pattern.compile( - "POLYGON \\(\\(471596.69167460\\d* 6185916.95119\\d*, 471107.562364\\d* 6110880.97422\\d*, 496207.10915\\d* 6110788.80471\\d*, 496271.3193704\\d* 6185825.6056\\d*, 471596.6916746\\d* 6185916.95119\\d*\\)\\)")); + "POLYGON \\(\\(471596.6916746\\d* 6185916.9511\\d*, 471107.562364\\d* 6110880.97422\\d*, 496207.10915\\d* 6110788.80471\\d*, 496271.3193704\\d* 6185825.6056\\d*, 471596.6916746\\d* 6185916.9511\\d*\\)\\)")); } @Test diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java index 7aa8f156578..ae26fedad22 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java @@ -998,7 +998,7 @@ public void test_ST_Transform() { registerUDFV2("ST_Transform", String.class, String.class, String.class, boolean.class); verifySqlSingleRes( "select ST_AsText(SEDONA.ST_Transform(ST_geomFromWKT('POLYGON ((110.54671 55.818002, 110.54671 55.143743, 110.940494 55.143743, 110.940494 55.818002, 110.54671 55.818002))'),'EPSG:4326', 'EPSG:32649', false))", - "POLYGON((471596.691674602 6185916.95119129,471107.562364101 6110880.97422817,496207.109151055 6110788.80471244,496271.319370462 6185825.60569904,471596.691674602 6185916.95119129))"); + "POLYGON((471596.691674603 6185916.95118885,471107.562364101 6110880.97422586,496207.109151055 6110788.80471013,496271.319370462 6185825.6056966,471596.691674603 6185916.95118885))"); } @Test diff --git a/snowflake/pom.xml b/snowflake/pom.xml index b89c8685eb3..d6e15a15dd9 100644 --- a/snowflake/pom.xml +++ b/snowflake/pom.xml @@ -61,6 +61,11 @@ org.geotools gt-referencing + + + org.datasyslab + proj4sedona + org.apache.commons commons-lang3 diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/GeoToolsWrapper.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/GeoToolsWrapper.java deleted file mode 100644 index fbb7963b612..00000000000 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/GeoToolsWrapper.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.snowflake.snowsql; - -import org.apache.sedona.common.FunctionsGeoTools; -import org.geotools.api.referencing.FactoryException; -import org.geotools.api.referencing.operation.TransformException; -import org.locationtech.jts.geom.Geometry; - -public class GeoToolsWrapper { - public static Geometry transform( - Geometry geometry, String sourceCRS, String targetCRS, boolean lenient) { - try { - return FunctionsGeoTools.transform(geometry, sourceCRS, targetCRS, lenient); - } catch (FactoryException | TransformException e) { - throw new RuntimeException(e); - } - } - - public static Geometry transform(Geometry geometry, String sourceCRS, String targetCRS) { - try { - return FunctionsGeoTools.transform(geometry, sourceCRS, targetCRS); - } catch (FactoryException | TransformException e) { - throw new RuntimeException(e); - } - } -} diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index 4f08f2250c2..b4cde280c49 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -23,6 +23,7 @@ import org.apache.sedona.common.Constructors; import org.apache.sedona.common.Functions; import org.apache.sedona.common.FunctionsGeoTools; +import org.apache.sedona.common.FunctionsProj4; import org.apache.sedona.common.Predicates; import org.apache.sedona.common.enums.FileDataSplitter; import org.apache.sedona.common.sphere.Haversine; @@ -1145,14 +1146,14 @@ public static Boolean ST_RelateMatch(String matrix1, String matrix2) { @UDFAnnotations.ParamMeta(argNames = {"geometry", "sourceCRS", "targetCRS"}) public static byte[] ST_Transform(byte[] geometry, String sourceCRS, String targetCRS) { return GeometrySerde.serialize( - GeoToolsWrapper.transform(GeometrySerde.deserialize(geometry), sourceCRS, targetCRS)); + FunctionsProj4.transform(GeometrySerde.deserialize(geometry), sourceCRS, targetCRS)); } @UDFAnnotations.ParamMeta(argNames = {"geometry", "sourceCRS", "targetCRS", "lenient"}) public static byte[] ST_Transform( byte[] geometry, String sourceCRS, String targetCRS, boolean lenient) { return GeometrySerde.serialize( - GeoToolsWrapper.transform( + FunctionsProj4.transform( GeometrySerde.deserialize(geometry), sourceCRS, targetCRS, lenient)); } diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java index 48f8842a424..7cff529ed36 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.sedona.common.Functions; import org.apache.sedona.common.FunctionsGeoTools; +import org.apache.sedona.common.FunctionsProj4; import org.apache.sedona.common.Predicates; import org.apache.sedona.common.sphere.Haversine; import org.apache.sedona.common.sphere.Spheroid; @@ -1363,7 +1364,7 @@ public static boolean ST_RelateMatch(String matrix1, String matrix2) { returnTypes = "Geometry") public static String ST_Transform(String geometry, String sourceCRS, String targetCRS) { return GeometrySerde.serGeoJson( - GeoToolsWrapper.transform(GeometrySerde.deserGeoJson(geometry), sourceCRS, targetCRS)); + FunctionsProj4.transform(GeometrySerde.deserGeoJson(geometry), sourceCRS, targetCRS)); } @UDFAnnotations.ParamMeta( @@ -1373,7 +1374,7 @@ public static String ST_Transform(String geometry, String sourceCRS, String targ public static String ST_Transform( String geometry, String sourceCRS, String targetCRS, boolean lenient) { return GeometrySerde.serGeoJson( - GeoToolsWrapper.transform( + FunctionsProj4.transform( GeometrySerde.deserGeoJson(geometry), sourceCRS, targetCRS, lenient)); } diff --git a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java index 38c4f061d0e..44b28858156 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java +++ b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.lang.reflect.Field; import java.nio.file.Paths; +import java.util.Locale; import org.apache.sedona.core.enums.GridType; import org.apache.sedona.core.enums.IndexType; import org.apache.sedona.core.enums.JoinBuildSide; @@ -32,8 +33,54 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.util.Utils; import org.locationtech.jts.geom.Envelope; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class SedonaConf implements Serializable { + static final Logger logger = LoggerFactory.getLogger(SedonaConf.class); + + /** + * CRS transformation mode for ST_Transform function. + * + *

+ * + * @since 1.9.0 + */ + public enum CRSTransformMode { + /** Use proj4sedona for all transformations. Raster transform not yet supported. */ + NONE, + /** Use proj4sedona for vector, GeoTools for raster (default). */ + RASTER, + /** Use GeoTools for all transformations (legacy behavior). */ + ALL; + + public static CRSTransformMode fromString(String value) { + if (value == null || value.isEmpty()) { + return RASTER; // default + } + try { + return valueOf(value.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException e) { + logger.warn( + "Invalid value '{}' for spark.sedona.crs.geotools, using default 'raster'", value); + return RASTER; + } + } + + /** Returns true if GeoTools should be used for vector CRS transformations. */ + public boolean useGeoToolsForVector() { + return this == ALL; + } + + /** Returns true if GeoTools should be used for raster CRS transformations. */ + public boolean useGeoToolsForRaster() { + return this == RASTER || this == ALL; + } + } // Global parameters of Sedona. All these parameters can be initialized through SparkConf. @@ -69,6 +116,9 @@ public class SedonaConf implements Serializable { private String libPostalDataDir; private Boolean libPostalUseSenzing = false; + // Parameter for CRS transformation mode + private CRSTransformMode crsTransformMode; + public static SedonaConf fromActiveSession() { return new SedonaConf(SparkSession.active().conf()); } @@ -177,6 +227,13 @@ private SedonaConf(ConfGetter confGetter) { this.libPostalUseSenzing = Boolean.parseBoolean(confGetter.get("spark.sedona.libpostal.useSenzing", "true")); + + // CRS transformation mode configuration + // - "none": Use proj4sedona for all transformations (raster not yet supported) + // - "raster" (default): Use proj4sedona for vector, GeoTools for raster + // - "all": Use GeoTools for all transformations (legacy behavior) + this.crsTransformMode = + CRSTransformMode.fromString(confGetter.get("spark.sedona.crs.geotools", "raster")); } // Helper method to prioritize `sedona.*` over `spark.sedona.*` @@ -275,4 +332,14 @@ public String getLibPostalDataDir() { public Boolean getLibPostalUseSenzing() { return libPostalUseSenzing; } + + /** + * Get the CRS transformation mode for ST_Transform. + * + * @return The CRS transformation mode + * @since 1.9.0 + */ + public CRSTransformMode getCRSTransformMode() { + return crsTransformMode; + } } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index 329848d2fcb..565a9e99574 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -18,7 +18,7 @@ */ package org.apache.spark.sql.sedona_sql.expressions -import org.apache.sedona.common.{Functions, FunctionsGeoTools} +import org.apache.sedona.common.{Functions, FunctionsGeoTools, FunctionsProj4} import org.apache.sedona.common.sphere.{Haversine, Spheroid} import org.apache.sedona.common.utils.{InscribedCircle, ValidDetail} import org.apache.sedona.core.utils.SedonaConf @@ -292,19 +292,71 @@ private[apache] case class ST_Centroid(inputExpressions: Seq[Expression]) * Given a geometry, sourceEPSGcode, and targetEPSGcode, convert the geometry's Spatial Reference * System / Coordinate Reference System. * + * The CRS transformation backend is controlled by the `spark.sedona.crs.geotools` config: + * - `none`: Use proj4sedona for all vector transformations + * - `raster` (default): Use proj4sedona for vector, GeoTools for raster + * - `all`: Use GeoTools for everything (legacy behavior) + * + * Note: For the 4-argument version with lenient parameter, proj4sedona ignores the lenient + * parameter (it always performs strict transformation). GeoTools uses the lenient parameter. + * + * The default fallback (when config cannot be read) is proj4sedona, ensuring consistent behavior + * during Spark's query optimization phases like constant folding. + * * @param inputExpressions + * @param useGeoTools */ -private[apache] case class ST_Transform(inputExpressions: Seq[Expression]) +private[apache] case class ST_Transform(inputExpressions: Seq[Expression], useGeoTools: Boolean) extends InferredExpression( - inferrableFunction4(FunctionsGeoTools.transform), - inferrableFunction3(FunctionsGeoTools.transform), - inferrableFunction2(FunctionsGeoTools.transform)) { + inferrableFunction4(FunctionsProj4.transform), + inferrableFunction3(FunctionsProj4.transform), + inferrableFunction2(FunctionsProj4.transform)) { + + def this(inputExpressions: Seq[Expression]) { + // We decide whether to use GeoTools based on active session config. + // SparkSession may not be available on executors, so we need to + // construct ST_Transform on driver. useGeoTools will be passed down + // to executors through object serialization/deserialization. + this(inputExpressions, ST_Transform.useGeoTools()) + } + + // Define proj4sedona function overloads (2, 3, 4-arg versions) + // Note: 4-arg version ignores the lenient parameter + private lazy val proj4Functions: Seq[InferrableFunction] = Seq( + inferrableFunction4(FunctionsProj4.transform), + inferrableFunction3(FunctionsProj4.transform), + inferrableFunction2(FunctionsProj4.transform)) + + // Define GeoTools function overloads (2, 3, 4-arg versions) + private lazy val geoToolsFunctions: Seq[InferrableFunction] = Seq( + inferrableFunction4(FunctionsGeoTools.transform), + inferrableFunction3(FunctionsGeoTools.transform), + inferrableFunction2(FunctionsGeoTools.transform)) + + override lazy val f: InferrableFunction = { + // Check config to decide between proj4sedona and GeoTools + // Note: 4-arg lenient parameter is ignored by proj4sedona + val candidateFunctions = if (useGeoTools) geoToolsFunctions else proj4Functions + FunctionResolver.resolveFunction(inputExpressions, candidateFunctions) + } protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) } } +object ST_Transform { + private def useGeoTools(): Boolean = { + try { + SedonaConf.fromActiveSession().getCRSTransformMode.useGeoToolsForVector() + } catch { + case _: Exception => + // If no active session, fall back to default (proj4sedona) + false + } + } +} + /** * Return the intersection shape of two geometries. The return type is a geometry * diff --git a/spark/common/src/test/resources/grids/ca_nrc_ntv2_0.tif b/spark/common/src/test/resources/grids/ca_nrc_ntv2_0.tif new file mode 100644 index 00000000000..6395bf20175 Binary files /dev/null and b/spark/common/src/test/resources/grids/ca_nrc_ntv2_0.tif differ diff --git a/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb b/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb new file mode 100644 index 00000000000..bc131103366 Binary files /dev/null and b/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb differ diff --git a/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb b/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb new file mode 100644 index 00000000000..1c3b8e44380 Binary files /dev/null and b/spark/common/src/test/resources/grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb differ diff --git a/spark/common/src/test/resources/grids/us_noaa_conus.tif b/spark/common/src/test/resources/grids/us_noaa_conus.tif new file mode 100644 index 00000000000..88c4d00b616 Binary files /dev/null and b/spark/common/src/test/resources/grids/us_noaa_conus.tif differ diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala new file mode 100644 index 00000000000..73b0ae55bc3 --- /dev/null +++ b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala @@ -0,0 +1,858 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.sedona_sql.expressions.st_functions._ +import org.junit.Assert.{assertEquals, assertNotNull, assertTrue} +import org.locationtech.jts.geom.Geometry + +/** + * Tests for ST_Transform using proj4sedona backend. + * + * These tests verify CRS transformations with various input formats: + * - EPSG codes + * - PROJ strings + * - WKT1 and WKT2 + * - PROJJSON + * - NAD grid files + * + * Tests also verify config switching between proj4sedona and GeoTools. + */ +class CRSTransformProj4Test extends TestBaseScala { + + private val COORD_TOLERANCE = 1.0 // 1 meter tolerance for projected coordinates + + describe("ST_Transform with proj4sedona (default mode)") { + + it("should transform EPSG:4326 to EPSG:3857 using SQL API") { + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(3857, result.getSRID) + // Validated against cs2cs: -13627665.27, 4547675.35 + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using geometry SRID as source (2-arg version)") { + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform to UTM Zone 10N") { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:32610')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(32610, result.getSRID) + // Validated against cs2cs: 551130.77, 4180998.88 + assertEquals(551130.77, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4180998.88, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using PROJ string") { + val projString = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs" + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), + 'EPSG:4326', + '$projString')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + // Web Mercator coordinates + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using PROJ string for UTM") { + val projString = "+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs" + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (-122.4194 37.7749)'), + '+proj=longlat +datum=WGS84 +no_defs', + '$projString')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + // UTM Zone 10N coordinates + assertTrue(result.getCoordinate.x > 540000 && result.getCoordinate.x < 560000) + assertTrue(result.getCoordinate.y > 4170000 && result.getCoordinate.y < 4190000) + } + + it("should transform using WKT1") { + val sourceWkt = + """GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]""" + val targetWkt = + """PROJCS["WGS 84 / UTM zone 51N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1]]""" + + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (120 60)'), + '$sourceWkt', + '$targetWkt')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + // Validated against cs2cs: 332705.18, 6655205.48 + assertEquals(332705.18, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(6655205.48, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using WKT2") { + val wkt2 = + """GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563]],CS[ellipsoidal,2],AXIS["latitude",north],AXIS["longitude",east],UNIT["degree",0.0174532925199433]]""" + + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (-122.4194 37.7749)'), + '$wkt2', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using PROJJSON") { + val projJson = + """{"type":"GeographicCRS","name":"WGS 84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}""" + + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (-122.4194 37.7749)'), + '$projJson', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + + it("should transform with NAD grid file") { + val gridFile = getClass.getClassLoader.getResource("grids/us_noaa_conus.tif") + if (gridFile != null) { + val gridPath = gridFile.getPath + val projString = s"+proj=longlat +ellps=GRS80 +nadgrids=$gridPath +no_defs" + + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_SetSRID(ST_GeomFromWKT('POINT (-96.0 40.0)'), 4326), + 'EPSG:4326', + '$projString')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertTrue(result.isValid) + } + } + + it("should handle round-trip transformation") { + val result = sparkSession + .sql("""SELECT ST_Transform( + ST_Transform( + ST_GeomFromWKT('POINT (-122.4194 37.7749)'), + 'EPSG:4326', + 'EPSG:3857'), + 'EPSG:3857', + 'EPSG:4326')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(4326, result.getSRID) + // Should return close to original coordinates + assertEquals(-122.4194, result.getCoordinate.x, 1e-6) + assertEquals(37.7749, result.getCoordinate.y, 1e-6) + } + + it("should preserve UserData") { + // Create a geometry with UserData + val df = sparkSession + .sql("SELECT ST_GeomFromWKT('POINT (-122.4194 37.7749)') as geom") + .selectExpr("ST_Transform(geom, 'EPSG:4326', 'EPSG:3857') as transformed") + + val result = df.first().getAs[Geometry](0) + assertNotNull(result) + assertEquals(3857, result.getSRID) + } + } + + describe("ST_Transform with DataFrame API") { + + it("should transform using 2-arg DataFrame API") { + import sparkSession.implicits._ + val df = sparkSession + .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom") + .select(ST_SetSRID($"geom", lit(4326)).as("geom")) + .select(ST_Transform($"geom", lit("EPSG:3857")).as("geom")) + + val result = df.first().getAs[Geometry](0) + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + + it("should transform using 3-arg DataFrame API") { + import sparkSession.implicits._ + val df = sparkSession + .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom") + .select(ST_Transform($"geom", lit("EPSG:4326"), lit("EPSG:3857")).as("geom")) + + val result = df.first().getAs[Geometry](0) + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + + it("should transform using PROJ string with DataFrame API") { + import sparkSession.implicits._ + val projString = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs" + val df = sparkSession + .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom") + .select(ST_SetSRID($"geom", lit(4326)).as("geom")) + .select(ST_Transform($"geom", lit(projString)).as("geom")) + + val result = df.first().getAs[Geometry](0) + assertNotNull(result) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should transform using PROJJSON with DataFrame API") { + import sparkSession.implicits._ + val projJson = + """{"type":"GeographicCRS","name":"WGS 84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}""" + + // Use createDataFrame to avoid constant folding issues with all-literal expressions + val pointDf = Seq(("POINT (-122.4194 37.7749)")) + .toDF("wkt") + .selectExpr("ST_GeomFromWKT(wkt) as geom") + + val df = pointDf.select(ST_Transform($"geom", lit(projJson), lit("EPSG:3857")).as("geom")) + + val result = df.first().getAs[Geometry](0) + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + + describe("ST_Transform with different geometry types") { + + it("should transform LineString") { + val result = sparkSession + .sql("""SELECT ST_Transform( + ST_GeomFromWKT('LINESTRING(-122.4 37.7, -122.5 37.8, -122.6 37.9)'), + 'EPSG:4326', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals("LineString", result.getGeometryType) + assertEquals(3, result.getNumPoints) + assertEquals(3857, result.getSRID) + } + + it("should transform Polygon") { + val result = sparkSession + .sql("""SELECT ST_Transform( + ST_GeomFromWKT('POLYGON((-122.5 37.7, -122.3 37.7, -122.3 37.9, -122.5 37.9, -122.5 37.7))'), + 'EPSG:4326', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals("Polygon", result.getGeometryType) + assertEquals(3857, result.getSRID) + } + + it("should transform MultiPoint") { + val result = sparkSession + .sql("""SELECT ST_Transform( + ST_GeomFromWKT('MULTIPOINT((-122.4 37.7), (-122.5 37.8), (-122.6 37.9))'), + 'EPSG:4326', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals("MultiPoint", result.getGeometryType) + assertEquals(3, result.getNumGeometries) + assertEquals(3857, result.getSRID) + } + + it("should transform GeometryCollection") { + val result = sparkSession + .sql("""SELECT ST_Transform( + ST_GeomFromWKT('GEOMETRYCOLLECTION(POINT(-122.4 37.7), LINESTRING(-122.5 37.8, -122.6 37.9))'), + 'EPSG:4326', + 'EPSG:3857')""") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals("GeometryCollection", result.getGeometryType) + assertEquals(2, result.getNumGeometries) + assertEquals(3857, result.getSRID) + } + } + + describe("ST_Transform config switching") { + + it("should use proj4sedona when config is 'none'") { + withConf(Map("spark.sedona.crs.geotools" -> "none")) { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + + it("should use proj4sedona when config is 'raster' (default)") { + withConf(Map("spark.sedona.crs.geotools" -> "raster")) { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + + it("should use GeoTools when config is 'all'") { + withConf(Map("spark.sedona.crs.geotools" -> "all")) { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + // GeoTools should produce similar results (within tolerance) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + + it("should use proj4sedona for 4-arg version by default (lenient ignored)") { + // 4-arg version should use proj4sedona by default, ignoring lenient parameter + val resultLenientTrue = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857', true)") + .first() + .getAs[Geometry](0) + + val resultLenientFalse = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857', false)") + .first() + .getAs[Geometry](0) + + assertNotNull(resultLenientTrue) + assertNotNull(resultLenientFalse) + // Both should produce same results (lenient is ignored) + assertEquals(resultLenientTrue.getCoordinate.x, resultLenientFalse.getCoordinate.x, 1e-9) + assertEquals(resultLenientTrue.getCoordinate.y, resultLenientFalse.getCoordinate.y, 1e-9) + // Should produce correct Web Mercator coordinates + assertEquals(-13627665.27, resultLenientTrue.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, resultLenientTrue.getCoordinate.y, COORD_TOLERANCE) + } + + it("should use proj4sedona for 4-arg version when config is 'none'") { + withConf(Map("spark.sedona.crs.geotools" -> "none")) { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857', false)") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + + it("should use GeoTools for 4-arg version when config is 'all'") { + withConf(Map("spark.sedona.crs.geotools" -> "all")) { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:3857', false)") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + // GeoTools should produce similar results + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + } + } + } + + describe("ST_Transform edge cases") { + + it("should handle same CRS transformation") { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 'EPSG:4326', 'EPSG:4326')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(4326, result.getSRID) + assertEquals(-122.4194, result.getCoordinate.x, 1e-6) + assertEquals(37.7749, result.getCoordinate.y, 1e-6) + } + + it("should handle empty geometry") { + val result = sparkSession + .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT EMPTY'), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertTrue(result.isEmpty) + } + + it("should handle null geometry") { + val result = sparkSession + .sql("SELECT ST_Transform(null, 'EPSG:4326', 'EPSG:3857')") + .first() + .get(0) + + assertTrue(result == null) + } + } + + // ==================== OSTN15 British National Grid Tests ==================== + + /** + * Official OSTN15 test data from Ordnance Survey. 40 test points covering all of Great Britain + * from Cornwall to Shetland. + * + * Data source: OSTN15_TestInput_ETRStoOSGB.txt and OSTN15_TestOutput_ETRStoOSGB.txt from + * https://www.ordnancesurvey.co.uk/documents/resources/OSTN15-NTv2.zip + * + * Each point has: + * - pointId: Official test point identifier (TP01-TP40) + * - etrsLat: ETRS89 latitude in degrees + * - etrsLon: ETRS89 longitude in degrees + */ + case class OSTN15TestPoint(pointId: String, etrsLat: Double, etrsLon: Double) + + // All 40 official OSTN15 test points from OSTN15_TestInput_ETRStoOSGB.txt + private val ostn15TestPoints: Seq[OSTN15TestPoint] = Seq( + OSTN15TestPoint("TP01", 49.92226393730, -6.29977752014), + OSTN15TestPoint("TP02", 49.96006137820, -5.20304609998), + OSTN15TestPoint("TP03", 50.43885825610, -4.10864563561), + OSTN15TestPoint("TP04", 50.57563665000, -1.29782277240), + OSTN15TestPoint("TP05", 50.93127937910, -1.45051433700), + OSTN15TestPoint("TP06", 51.40078220140, -3.55128349240), + OSTN15TestPoint("TP07", 51.37447025550, 1.44454730409), + OSTN15TestPoint("TP08", 51.42754743020, -2.54407618349), + OSTN15TestPoint("TP09", 51.48936564950, -0.11992557180), + OSTN15TestPoint("TP10", 51.85890896400, -4.30852476960), + OSTN15TestPoint("TP11", 51.89436637350, 0.89724327012), + OSTN15TestPoint("TP12", 52.25529381630, -2.15458614387), + OSTN15TestPoint("TP13", 52.25160951230, -0.91248956970), + OSTN15TestPoint("TP14", 52.75136687170, 0.40153547065), + OSTN15TestPoint("TP15", 52.96219109410, -1.19747655922), + OSTN15TestPoint("TP16", 53.34480280190, -2.64049320810), + OSTN15TestPoint("TP17", 53.41628516040, -4.28918069756), + OSTN15TestPoint("TP18", 53.41630925420, -4.28917792869), + OSTN15TestPoint("TP19", 53.77911025760, -3.04045490691), + OSTN15TestPoint("TP20", 53.80021519630, -1.66379168242), + OSTN15TestPoint("TP21", 54.08666318080, -4.63452168212), + OSTN15TestPoint("TP22", 54.11685144290, -0.07773133187), + OSTN15TestPoint("TP23", 54.32919541010, -4.38849118133), + OSTN15TestPoint("TP24", 54.89542340420, -2.93827741149), + OSTN15TestPoint("TP25", 54.97912273660, -1.61657685184), + OSTN15TestPoint("TP26", 55.85399952950, -4.29649016251), + OSTN15TestPoint("TP27", 55.92478265510, -3.29479219337), + OSTN15TestPoint("TP28", 57.00606696050, -5.82836691850), + OSTN15TestPoint("TP29", 57.13902518960, -2.04856030746), + OSTN15TestPoint("TP30", 57.48625000720, -4.21926398555), + OSTN15TestPoint("TP31", 57.81351838410, -8.57854456076), + OSTN15TestPoint("TP32", 58.21262247180, -7.59255560556), + OSTN15TestPoint("TP33", 58.51560361300, -6.26091455533), + OSTN15TestPoint("TP34", 58.58120461280, -3.72631022121), + OSTN15TestPoint("TP35", 59.03743871190, -3.21454001115), + OSTN15TestPoint("TP36", 59.09335035320, -4.41757674598), + OSTN15TestPoint("TP37", 59.09671617400, -5.82799339844), + OSTN15TestPoint("TP38", 59.53470794490, -1.62516966058), + OSTN15TestPoint("TP39", 59.85409913890, -1.27486910356), + OSTN15TestPoint("TP40", 60.13308091660, -2.07382822798)) + + // Grid shift should produce small but non-zero changes (< 0.01 degrees) + private val GRID_SHIFT_TOLERANCE = 0.01 + // Round-trip tolerance (should return to original within ~1e-6 degrees) + private val ROUND_TRIP_TOLERANCE = 1e-6 + + // Remote URL for grid file (hosted on GitHub) + private val OSTN15_ETRS_TO_OSGB_URL = + "https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb" + + describe("ST_Transform with OSTN15 grid files - 40 official test points") { + import org.datasyslab.proj4sedona.Proj4 + import org.datasyslab.proj4sedona.grid.NadgridRegistry + + it("should transform all 40 points ETRS89 to OSGB36 with timing and cache metrics") { + val gridFile = getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb") + assume(gridFile != null, "OSTN15 ETRStoOSGB grid file not found") + + val gridPath = gridFile.getPath + val etrs89WithGrid = s"+proj=longlat +ellps=GRS80 +nadgrids=$gridPath" + val osgb36 = "+proj=longlat +ellps=airy" + + // Clear caches to measure cold start + Proj4.clearCache() + NadgridRegistry.clear() + + println("\n" + "=" * 70) + println("OSTN15 ETRS89 -> OSGB36: 40 Official Test Points") + println("=" * 70) + + // First transformation (cold - loads grid file) + val startCold = System.nanoTime() + val firstResult = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (${ostn15TestPoints.head.etrsLon} ${ostn15TestPoints.head.etrsLat})'), + '$etrs89WithGrid', + '$osgb36')""") + .first() + .getAs[Geometry](0) + val coldTime = (System.nanoTime() - startCold) / 1e6 + + println(f"First transform (cold, loads ~15MB grid): $coldTime%.2f ms") + println(f" Proj cache size: ${Proj4.getCacheSize}") + println(f" Grid cache size: ${NadgridRegistry.size()}") + + // Transform remaining 39 points (warm - cached) + val startWarm = System.nanoTime() + var successCount = 1 // Already did first one + var failCount = 0 + + ostn15TestPoints.tail.foreach { tp => + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'), + '$etrs89WithGrid', + '$osgb36')""") + .first() + .getAs[Geometry](0) + + if (result != null) { + val xShift = Math.abs(result.getCoordinate.x - tp.etrsLon) + val yShift = Math.abs(result.getCoordinate.y - tp.etrsLat) + + if (xShift < GRID_SHIFT_TOLERANCE && yShift < GRID_SHIFT_TOLERANCE && + xShift > 1e-9 && yShift > 1e-9) { + successCount += 1 + } else { + failCount += 1 + println(s" ${tp.pointId}: Unexpected shift - X: $xShift, Y: $yShift") + } + } else { + failCount += 1 + println(s" ${tp.pointId}: Result is null") + } + } + val warmTime = (System.nanoTime() - startWarm) / 1e6 + val avgWarmTime = warmTime / 39 + + println(f"\nRemaining 39 transforms (warm, cached): $warmTime%.2f ms total") + println(f" Average per transform: $avgWarmTime%.2f ms") + println(f" Cache speedup: ${coldTime / avgWarmTime}%.1fx") + println(f"\nResults: $successCount/40 passed, $failCount failed") + + // Verify first result + assertNotNull("First result should not be null", firstResult) + val xShift = Math.abs(firstResult.getCoordinate.x - ostn15TestPoints.head.etrsLon) + val yShift = Math.abs(firstResult.getCoordinate.y - ostn15TestPoints.head.etrsLat) + assertTrue( + s"X shift ($xShift) should be < $GRID_SHIFT_TOLERANCE", + xShift < GRID_SHIFT_TOLERANCE) + assertTrue( + s"Y shift ($yShift) should be < $GRID_SHIFT_TOLERANCE", + yShift < GRID_SHIFT_TOLERANCE) + assertTrue(s"X shift ($xShift) should be non-zero", xShift > 1e-9) + assertTrue(s"Y shift ($yShift) should be non-zero", yShift > 1e-9) + + assertEquals("All 40 points should transform successfully", 40, successCount) + } + + it("should transform all 40 points OSGB36 to ETRS89 with timing and cache metrics") { + val gridFile = getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb") + assume(gridFile != null, "OSTN15 OSGBtoETRS grid file not found") + + val gridPath = gridFile.getPath + val osgb36WithGrid = s"+proj=longlat +ellps=airy +nadgrids=$gridPath" + val etrs89 = "+proj=longlat +ellps=GRS80" + + // First, get OSGB36 coordinates by transforming ETRS89 -> OSGB36 + val etrsToOsgbFile = + getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb") + assume(etrsToOsgbFile != null, "OSTN15 ETRStoOSGB grid file not found") + val etrs89WithGrid = s"+proj=longlat +ellps=GRS80 +nadgrids=${etrsToOsgbFile.getPath}" + val osgb36 = "+proj=longlat +ellps=airy" + + // Clear caches + Proj4.clearCache() + NadgridRegistry.clear() + + println("\n" + "=" * 70) + println("OSTN15 OSGB36 -> ETRS89: 40 Official Test Points") + println("=" * 70) + + // Pre-compute OSGB36 coordinates (this also warms up ETRS->OSGB cache) + val osgb36Coords = ostn15TestPoints.map { tp => + val osgbResult = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'), + '$etrs89WithGrid', + '$osgb36')""") + .first() + .getAs[Geometry](0) + ( + tp.pointId, + osgbResult.getCoordinate.x, + osgbResult.getCoordinate.y, + tp.etrsLon, + tp.etrsLat) + } + + // Clear only the reverse direction cache to measure cold start for OSGB->ETRS + // (Grid files are still cached in NadgridRegistry) + println(s"Grid cache size before OSGB->ETRS: ${NadgridRegistry.size()}") + + // First transformation (semi-cold - grid file cached but new CRS pair) + val startFirst = System.nanoTime() + val (_, firstOsgbLon, firstOsgbLat, _, _) = osgb36Coords.head + val firstResult = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT ($firstOsgbLon $firstOsgbLat)'), + '$osgb36WithGrid', + '$etrs89')""") + .first() + .getAs[Geometry](0) + val firstTime = (System.nanoTime() - startFirst) / 1e6 + + println(f"First transform (grid cached, new CRS): $firstTime%.2f ms") + + // Transform remaining 39 points + val startRest = System.nanoTime() + var successCount = 1 + var failCount = 0 + + osgb36Coords.tail.foreach { case (pointId, osgbLon, osgbLat, origEtrsLon, origEtrsLat) => + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT ($osgbLon $osgbLat)'), + '$osgb36WithGrid', + '$etrs89')""") + .first() + .getAs[Geometry](0) + + if (result != null) { + val xShift = Math.abs(result.getCoordinate.x - osgbLon) + val yShift = Math.abs(result.getCoordinate.y - osgbLat) + + if (xShift < GRID_SHIFT_TOLERANCE && yShift < GRID_SHIFT_TOLERANCE && + xShift > 1e-9 && yShift > 1e-9) { + successCount += 1 + } else { + failCount += 1 + println(s" $pointId: Unexpected shift - X: $xShift, Y: $yShift") + } + } else { + failCount += 1 + println(s" $pointId: Result is null") + } + } + val restTime = (System.nanoTime() - startRest) / 1e6 + val avgTime = restTime / 39 + + println(f"\nRemaining 39 transforms (fully cached): $restTime%.2f ms total") + println(f" Average per transform: $avgTime%.2f ms") + println(f" Speedup vs first: ${firstTime / avgTime}%.1fx") + println(f"\nResults: $successCount/40 passed, $failCount failed") + + assertNotNull("First result should not be null", firstResult) + assertEquals("All 40 points should transform successfully", 40, successCount) + } + + it("should round-trip all 40 points ETRS89 -> OSGB36 -> ETRS89") { + val etrsToOsgbFile = + getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb") + val osgbToEtrsFile = + getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb") + assume(etrsToOsgbFile != null && osgbToEtrsFile != null, "OSTN15 grid files not found") + + val etrs89WithGrid = s"+proj=longlat +ellps=GRS80 +nadgrids=${etrsToOsgbFile.getPath}" + val osgb36 = "+proj=longlat +ellps=airy" + val osgb36WithGrid = s"+proj=longlat +ellps=airy +nadgrids=${osgbToEtrsFile.getPath}" + val etrs89 = "+proj=longlat +ellps=GRS80" + + println("\n" + "=" * 70) + println("OSTN15 Round-trip: ETRS89 -> OSGB36 -> ETRS89 (40 points)") + println("=" * 70) + + val startTotal = System.nanoTime() + var successCount = 0 + var maxError = 0.0 + var worstPoint = "" + + ostn15TestPoints.foreach { tp => + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_Transform( + ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'), + '$etrs89WithGrid', + '$osgb36'), + '$osgb36WithGrid', + '$etrs89')""") + .first() + .getAs[Geometry](0) + + if (result != null) { + val xError = Math.abs(result.getCoordinate.x - tp.etrsLon) + val yError = Math.abs(result.getCoordinate.y - tp.etrsLat) + val totalError = Math.sqrt(xError * xError + yError * yError) + + if (xError < ROUND_TRIP_TOLERANCE && yError < ROUND_TRIP_TOLERANCE) { + successCount += 1 + } else { + println(f" ${tp.pointId}: Round-trip error - X: $xError%.9f, Y: $yError%.9f") + } + + if (totalError > maxError) { + maxError = totalError + worstPoint = tp.pointId + } + } + } + val totalTime = (System.nanoTime() - startTotal) / 1e6 + + println(f"\nTotal time for 40 round-trips: $totalTime%.2f ms") + println(f" Average per round-trip: ${totalTime / 40}%.2f ms") + println(f" Max error: $maxError%.9f degrees (at $worstPoint)") + println(f"\nResults: $successCount/40 passed within tolerance ($ROUND_TRIP_TOLERANCE deg)") + + assertEquals("All 40 points should round-trip successfully", 40, successCount) + } + + it("should transform all 40 points using remote grid file with download timing") { + // Clear caches to force fresh download + Proj4.clearCache() + NadgridRegistry.clear() + + val etrs89WithGrid = s"+proj=longlat +ellps=GRS80 +nadgrids=$OSTN15_ETRS_TO_OSGB_URL" + val osgb36 = "+proj=longlat +ellps=airy" + + println("\n" + "=" * 70) + println("OSTN15 Remote Grid File: Download + Transform 40 Points") + println(s"URL: $OSTN15_ETRS_TO_OSGB_URL") + println("=" * 70) + + // First transformation (downloads ~15MB grid file) + val startDownload = System.nanoTime() + val firstResult = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (${ostn15TestPoints.head.etrsLon} ${ostn15TestPoints.head.etrsLat})'), + '$etrs89WithGrid', + '$osgb36')""") + .first() + .getAs[Geometry](0) + val downloadTime = (System.nanoTime() - startDownload) / 1e6 + + println(f"First transform (downloads ~15MB grid): $downloadTime%.2f ms") + println(f" Proj cache size: ${Proj4.getCacheSize}") + println(f" Grid cache size: ${NadgridRegistry.size()}") + + // Verify first result + assertNotNull("First result should not be null", firstResult) + val xShift = Math.abs(firstResult.getCoordinate.x - ostn15TestPoints.head.etrsLon) + val yShift = Math.abs(firstResult.getCoordinate.y - ostn15TestPoints.head.etrsLat) + assertTrue( + s"X shift ($xShift) should be < $GRID_SHIFT_TOLERANCE", + xShift < GRID_SHIFT_TOLERANCE) + assertTrue( + s"Y shift ($yShift) should be < $GRID_SHIFT_TOLERANCE", + yShift < GRID_SHIFT_TOLERANCE) + + // Transform remaining 39 points (cached) + val startCached = System.nanoTime() + var successCount = 1 + + ostn15TestPoints.tail.foreach { tp => + val result = sparkSession + .sql(s"""SELECT ST_Transform( + ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'), + '$etrs89WithGrid', + '$osgb36')""") + .first() + .getAs[Geometry](0) + + if (result != null) { + val xShiftPt = Math.abs(result.getCoordinate.x - tp.etrsLon) + val yShiftPt = Math.abs(result.getCoordinate.y - tp.etrsLat) + if (xShiftPt < GRID_SHIFT_TOLERANCE && yShiftPt < GRID_SHIFT_TOLERANCE && + xShiftPt > 1e-9 && yShiftPt > 1e-9) { + successCount += 1 + } else { + println(s" ${tp.pointId}: Unexpected shift - X: $xShiftPt, Y: $yShiftPt") + } + } else { + println(s" ${tp.pointId}: Result is null") + } + } + val cachedTime = (System.nanoTime() - startCached) / 1e6 + val avgCached = cachedTime / 39 + + println(f"\nRemaining 39 transforms (cached): $cachedTime%.2f ms total") + println(f" Average per transform: $avgCached%.2f ms") + println(f" Download overhead: ${downloadTime - avgCached}%.2f ms") + println(f" Download speedup vs cached: ${downloadTime / avgCached}%.1fx") + println(f"\nResults: $successCount/40 passed") + + assertEquals("All 40 points should transform successfully", 40, successCount) + } + } +} diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 725f4cc8a39..8cdd05da154 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -734,7 +734,7 @@ class dataFrameAPITestScala extends TestBaseScala { .select(ST_Transform($"geom", lit("EPSG:4326"), lit("EPSG:32649")).as("geom")) .selectExpr("ST_ReducePrecision(geom, 2)") val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText() - val expectedResult = "POINT (-33741810.95 1823994.03)" + val expectedResult = "POINT (-10625664.38 19664344.48)" assertEquals(expectedResult, actualResult) pointDf = sparkSession @@ -744,7 +744,7 @@ class dataFrameAPITestScala extends TestBaseScala { .select(ST_Transform($"geom", lit("EPSG:32649")).as("geom")) .selectExpr("ST_ReducePrecision(geom, 2)") val actual = df.take(1)(0).get(0).asInstanceOf[Geometry].toText() - val expected = "POINT (1560393.11 10364606.84)" + val expected = "POINT (1559798.3 10364755.11)" assertEquals(expected, actual) } diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala index 6c280e10fc4..601a7ccbd99 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala @@ -423,11 +423,11 @@ class functionTestScala val polygon = "POLYGON ((120 60, 121 61, 122 62, 123 63, 124 64, 120 60))" val geometryFactory = new GeometryFactory val coords = new Array[Coordinate](6) - coords(0) = new Coordinate(1000961.4042164611, 6685590.893548286) - coords(1) = new Coordinate(1039394.2790044537, 6804110.988854166) - coords(2) = new Coordinate(1074157.4382441062, 6923060.447921266) - coords(3) = new Coordinate(1105199.4259604653, 7042351.1239674715) - coords(4) = new Coordinate(1132473.1932022288, 7161889.652860963) + coords(0) = new Coordinate(1000961.4045206103, 6685590.89353171) + coords(1) = new Coordinate(1039394.2797213441, 6804110.988844556) + coords(2) = new Coordinate(1074157.4397193217, 6923060.447944083) + coords(3) = new Coordinate(1105199.4286994014, 7042351.124075022) + coords(4) = new Coordinate(1132473.1978762923, 7161889.653145648) coords(5) = coords(0) val polygonExpected = geometryFactory.createPolygon(coords) val EPSG_TGT_CRS = CRS.decode("EPSG:32649", true)