diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java index 4c0d401c74b9..4a8e025d7e05 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java @@ -57,4 +57,59 @@ public static String tableLocation(TableIdentifier tableIdentifier, boolean useU return tableIdentifier.name(); } } + + /** + * Returns true if the location contains a URI scheme (e.g. {@code s3:}, {@code hdfs:}, {@code + * file:}), per RFC 3986 + * section 3.1. + */ + private static boolean hasScheme(String location) { + if (location.isEmpty()) { + return false; + } + + // Early termination for relative locations since most commonly start with / + if (location.charAt(0) == '/') { + return false; + } + + for (int i = 0; i < location.length(); i += 1) { + char ch = location.charAt(i); + if (ch == ':') { + return i > 0; + } + + if (!Character.isLetterOrDigit(ch) && ch != '+' && ch != '-' && ch != '.') { + return false; + } + } + + return false; + } + + /** + * Resolves a location against a table location. If the location has a URI scheme, it is returned + * as-is. Otherwise, the location is appended to the table location without any additional + * separator. + */ + public static String resolveLocation(String tableLocation, String location) { + if (hasScheme(location)) { + return location; + } + + return tableLocation + location; + } + + /** + * Relativizes a location against a table location. If the location starts with the table + * location, the prefix is removed and the remaining relative portion is returned. Otherwise, the + * location is returned as-is. + */ + public static String relativizeLocation(String tableLocation, String location) { + if (location.startsWith(tableLocation)) { + return location.substring(tableLocation.length()); + } + + return location; + } } diff --git a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java index 9a7b2768d995..ecaceeac80b8 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestLocationUtil.java @@ -84,4 +84,141 @@ void testStripTrailingSlashForRootPathWithTrailingSlashes() { .as("Should be root path") .isEqualTo(rootPath); } + + @Test + public void testResolveRelativeLocations() { + String tableLocation = "s3://bucket/table"; + + assertThat(LocationUtil.resolveLocation(tableLocation, "/metadata/file.parquet")) + .isEqualTo("s3://bucket/table/metadata/file.parquet"); + + assertThat(LocationUtil.resolveLocation(tableLocation, "/data/00000-0.parquet")) + .isEqualTo("s3://bucket/table/data/00000-0.parquet"); + } + + @Test + public void testResolveLocationsWithColonsInSegments() { + String tableLocation = "s3://bucket/table"; + + assertThat( + LocationUtil.resolveLocation(tableLocation, "/data/partition=key:value/file.parquet")) + .isEqualTo("s3://bucket/table/data/partition=key:value/file.parquet"); + + assertThat(LocationUtil.resolveLocation(tableLocation, "/metadata/snap-123:456.avro")) + .isEqualTo("s3://bucket/table/metadata/snap-123:456.avro"); + } + + @Test + public void testResolveAbsoluteLocationsUnchanged() { + String tableLocation = "s3://bucket/table"; + + assertThat(LocationUtil.resolveLocation(tableLocation, "s3://other-bucket/path/file.parquet")) + .isEqualTo("s3://other-bucket/path/file.parquet"); + + assertThat(LocationUtil.resolveLocation(tableLocation, "hdfs://namenode/path/file.parquet")) + .isEqualTo("hdfs://namenode/path/file.parquet"); + } + + @Test + public void testRelativize() { + String tableLocation = "s3://bucket/table"; + + assertThat( + LocationUtil.relativizeLocation( + tableLocation, "s3://bucket/table/metadata/file.parquet")) + .isEqualTo("/metadata/file.parquet"); + + assertThat( + LocationUtil.relativizeLocation( + tableLocation, "s3://bucket/table/data/00000-0.parquet")) + .isEqualTo("/data/00000-0.parquet"); + } + + @Test + public void testRelativizeLocationNotUnderTableLocation() { + String tableLocation = "s3://bucket/table"; + + // different bucket + assertThat( + LocationUtil.relativizeLocation(tableLocation, "s3://other-bucket/path/file.parquet")) + .isEqualTo("s3://other-bucket/path/file.parquet"); + + // same bucket, different path + assertThat( + LocationUtil.relativizeLocation( + tableLocation, "s3://bucket/other-table/data/file.parquet")) + .isEqualTo("s3://bucket/other-table/data/file.parquet"); + } + + @Test + public void testRelativizeLocationEqualToTableLocation() { + String tableLocation = "s3://bucket/table"; + + assertThat(LocationUtil.relativizeLocation(tableLocation, "s3://bucket/table")).isEqualTo(""); + } + + @Test + public void testRelativizeMismatchedFileSchemeNotRelativized() { + // mixed file: variants are NOT relativized. Consistent URI forms are the caller's + // responsibility + assertThat( + LocationUtil.relativizeLocation( + "file:/tmp/table", "file:///tmp/table/metadata/file.parquet")) + .isEqualTo("file:///tmp/table/metadata/file.parquet"); + + assertThat( + LocationUtil.relativizeLocation( + "file:///tmp/table", "file:/tmp/table/metadata/file.parquet")) + .isEqualTo("file:/tmp/table/metadata/file.parquet"); + } + + @Test + public void testResolveAbsoluteLocationWithNonAlphanumericScheme() { + String tableLocation = "s3://bucket/table"; + + assertThat(LocationUtil.resolveLocation(tableLocation, "git+ssh://host/repo")) + .isEqualTo("git+ssh://host/repo"); + } + + @Test + public void testResolveEmptyLocationReturnsTableLocation() { + String tableLocation = "s3://bucket/table"; + assertThat(LocationUtil.resolveLocation(tableLocation, "")).isEqualTo(tableLocation); + } + + @Test + public void testRelativizeResolveRoundTrip() { + String tableLocation = "s3://bucket/table"; + String absoluteLocation = "s3://bucket/table/metadata/root-manifest.parquet"; + + String relativized = LocationUtil.relativizeLocation(tableLocation, absoluteLocation); + assertThat(relativized).isEqualTo("/metadata/root-manifest.parquet"); + + String resolved = LocationUtil.resolveLocation(tableLocation, relativized); + assertThat(resolved).isEqualTo(absoluteLocation); + } + + @Test + public void testRelativizeResolveRoundTripWithFileScheme() { + String tableLocation = "file:///tmp/warehouse/table"; + String absoluteLocation = "file:///tmp/warehouse/table/metadata/root-manifest.parquet"; + + String relativized = LocationUtil.relativizeLocation(tableLocation, absoluteLocation); + assertThat(relativized).isEqualTo("/metadata/root-manifest.parquet"); + + String resolved = LocationUtil.resolveLocation(tableLocation, relativized); + assertThat(resolved).isEqualTo(absoluteLocation); + } + + @Test + public void testRelativizeResolveRoundTripWithHDFS() { + String tableLocation = "hdfs://namenode/warehouse/table"; + String absoluteLocation = "hdfs://namenode/warehouse/table/data/00000-0.parquet"; + + String relativized = LocationUtil.relativizeLocation(tableLocation, absoluteLocation); + assertThat(relativized).isEqualTo("/data/00000-0.parquet"); + + String resolved = LocationUtil.resolveLocation(tableLocation, relativized); + assertThat(resolved).isEqualTo(absoluteLocation); + } }