Skip to content
Permalink
Browse files
Fix GCS based ingestion if bucket name contains underscores (#12445)
GCP allows bucket names to contain underscores. When a location in such a bucket
is mapped to `java.net.URI`, `URI.getHost()` returns null. `URI.getHost()` is used as
the bucket name in `CloudObjectLocation`, leading to an NPE.

This commit uses `URI.getAuthority()` as the bucket name if `URI.getHost()` is null.
  • Loading branch information
tejaswini-imply committed Apr 21, 2022
1 parent 5099f5a commit 177e1856cdaf3e5aa7a5754d129fc1945b9f7c72
Showing 2 changed files with 22 additions and 8 deletions.
@@ -71,7 +71,7 @@ public CloudObjectLocation(@JsonProperty("bucket") String bucket, @JsonProperty(

public CloudObjectLocation(URI uri)
{
this(uri.getHost(), uri.getPath());
this(uri.getHost() != null ? uri.getHost() : uri.getAuthority(), uri.getPath());
}

/**
@@ -115,13 +115,27 @@ public void testBucketName()
}

@Test
public void testInvalidBucketName()
public void testBucketNameWithoutUnderscores()
{
expectedException.expect(NullPointerException.class);
expectedException.expectMessage("bucket name cannot be null. Please verify if bucket name adheres to naming rules");
// Underscore(_) character is not valid for bucket names
CloudObjectLocation invalidBucket1 = new CloudObjectLocation("test_bucket", "path/to/path");
CloudObjectLocation invalidBucket2 = new CloudObjectLocation(invalidBucket1.toUri(SCHEME));
Assert.assertEquals("test_bucket", new CloudObjectLocation(invalidBucket2.toUri(SCHEME)));
CloudObjectLocation gsValidBucket = new CloudObjectLocation(URI.create("gs://1test.bucket-value/path/to/path"));
Assert.assertEquals("1test.bucket-value", gsValidBucket.getBucket());
Assert.assertEquals("path/to/path", gsValidBucket.getPath());

CloudObjectLocation s3ValidBucket = new CloudObjectLocation(URI.create("s3://2test.bucket-value/path/to/path"));
Assert.assertEquals("2test.bucket-value", s3ValidBucket.getBucket());
Assert.assertEquals("path/to/path", s3ValidBucket.getPath());
}

@Test
public void testBucketNameWithUnderscores()
{
// Underscore(_) character is allowed for bucket names by GCP
CloudObjectLocation gsValidBucket = new CloudObjectLocation(URI.create("gs://test_bucket/path/to/path"));
Assert.assertEquals("test_bucket", gsValidBucket.getBucket());
Assert.assertEquals("path/to/path", gsValidBucket.getPath());

CloudObjectLocation s3ValidBucket = new CloudObjectLocation(URI.create("s3://test_bucket/path/to/path"));
Assert.assertEquals("test_bucket", s3ValidBucket.getBucket());
Assert.assertEquals("path/to/path", s3ValidBucket.getPath());
}
}

0 comments on commit 177e185

Please sign in to comment.