Skip to content

Commit

Permalink
fix handling of bucket names with dots in them
Browse files Browse the repository at this point in the history
Signed-off-by: Fredrik Adelöw <freben@gmail.com>
  • Loading branch information
freben committed Mar 11, 2022
1 parent fa2bd08 commit b66f701
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 45 deletions.
5 changes: 5 additions & 0 deletions .changeset/wicked-tools-return.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@backstage/backend-common': patch
---

Fix handling of bucket names with dots, in `AwsS3UrlReader`
82 changes: 79 additions & 3 deletions packages/backend-common/src/reading/AwsS3UrlReader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { ConfigReader } from '@backstage/config';
import { JsonObject } from '@backstage/types';
import { getVoidLogger } from '../logging';
import { DefaultReadTreeResponseFactory } from './tree';
import { AwsS3UrlReader } from './AwsS3UrlReader';
import { AwsS3UrlReader, parseUrl } from './AwsS3UrlReader';
import {
AwsS3Integration,
readAwsS3IntegrationConfig,
Expand All @@ -33,6 +33,82 @@ const treeResponseFactory = DefaultReadTreeResponseFactory.create({
config: new ConfigReader({}),
});

describe('parseUrl', () => {
it('supports all aws formats', () => {
expect(
parseUrl('https://s3.us-west-2.amazonaws.com/my.bucket-3/a/puppy.jpg', {
host: 'amazonaws.com',
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: 'us-west-2',
});
expect(
parseUrl('https://s3-us-west-2.amazonaws.com/my.bucket-3/a/puppy.jpg', {
host: 'amazonaws.com',
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: 'us-west-2',
});
expect(
parseUrl('https://my.bucket-3.s3.us-west-2.amazonaws.com/a/puppy.jpg', {
host: 'amazonaws.com',
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: 'us-west-2',
});
expect(
parseUrl(
'https://ignored.s3.us-west-2.amazonaws.com/my.bucket-3/a/puppy.jpg',
{
host: 'amazonaws.com',
s3ForcePathStyle: true,
},
),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: 'us-west-2',
});
});

it('supports all non-aws formats', () => {
expect(
parseUrl('https://my-host.com/my.bucket-3/a/puppy.jpg', {
host: 'my-host.com',
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: '',
});
expect(
parseUrl('https://my.bucket-3.my-host.com/a/puppy.jpg', {
host: 'my-host.com',
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: '',
});
expect(
parseUrl('https://ignored.my-host.com/my.bucket-3/a/puppy.jpg', {
host: 'my-host.com',
s3ForcePathStyle: true,
}),
).toEqual({
path: 'a/puppy.jpg',
bucket: 'my.bucket-3',
region: '',
});
});
});

describe('AwsS3UrlReader', () => {
const createReader = (config: JsonObject): UrlReaderPredicateTuple[] => {
return AwsS3UrlReader.factory({
Expand Down Expand Up @@ -178,7 +254,7 @@ describe('AwsS3UrlReader', () => {
),
).rejects.toThrow(
Error(
`Could not retrieve file from S3; caused by Error: invalid AWS S3 URL, cannot parse region from host in https://test-bucket.s3.us-east-2.NOTamazonaws.com/file.yaml`,
`Could not retrieve file from S3; caused by Error: Invalid AWS S3 URL https://test-bucket.s3.us-east-2.NOTamazonaws.com/file.yaml`,
),
);
});
Expand Down Expand Up @@ -234,7 +310,7 @@ describe('AwsS3UrlReader', () => {
),
).rejects.toThrow(
Error(
`Could not retrieve file from S3; caused by Error: invalid AWS S3 URL, cannot parse region from host in https://test-bucket.s3.us-east-2.NOTamazonaws.com/file.yaml`,
`Could not retrieve file from S3; caused by Error: Invalid AWS S3 URL https://test-bucket.s3.us-east-2.NOTamazonaws.com/file.yaml`,
),
);
});
Expand Down
102 changes: 60 additions & 42 deletions packages/backend-common/src/reading/AwsS3UrlReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,64 +35,82 @@ import {
import { ForwardedError, NotModifiedError } from '@backstage/errors';
import { ListObjectsV2Output, ObjectList } from 'aws-sdk/clients/s3';

const parseURL = (
/**
* Path style URLs: https://s3.(region).amazonaws.com/(bucket)/(key)
* The region can also be on the old form: https://s3-(region).amazonaws.com/(bucket)/(key)
* Virtual hosted style URLs: https://(bucket).s3.(region).amazonaws.com/(key)
* See https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
*/
export function parseUrl(
url: string,
config: AwsS3IntegrationConfig,
): { path: string; bucket: string; region: string } => {
let { host, pathname } = new URL(url);
): { path: string; bucket: string; region: string } {
const parsedUrl = new URL(url);

/**
* Removes the leading '/' from the pathname to be processed
* as a parameter by AWS S3 SDK getObject method.
*/
pathname = pathname.substr(1);

let bucket;
let region;
const pathname = parsedUrl.pathname.substring(1);
const host = parsedUrl.host;

/**
* Path style URLs: https://s3.Region.amazonaws.com/bucket-name/key-name
* Virtual hosted style URLs: https://bucket-name.s3.Region.amazonaws.com/key-name
* See https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
*/
if (config.s3ForcePathStyle) {
if (pathname.indexOf('/') < 0) {
throw new Error(
`invalid path-style AWS S3 URL, ${url} does not contain bucket in the path`,
);
// Treat Amazon hosted separately because it has special region logic
if (config.host === 'amazonaws.com') {
const match = host.match(
/^(?:([a-z0-9.-]+)\.)?s3[.-]([a-z0-9-]+)\.amazonaws\.com$/,
);
if (!match) {
throw new Error(`Invalid AWS S3 URL ${url}`);
}
[bucket] = pathname.split('/');
pathname = pathname.substr(bucket.length + 1);
} else {
if (host.indexOf('.') < 0) {
throw new Error(
`invalid virtual hosted-style AWS S3 URL, ${url} does not contain bucket prefix in the host`,
);

const [, hostBucket, hostRegion] = match;

if (config.s3ForcePathStyle || !hostBucket) {
const slashIndex = pathname.indexOf('/');
if (slashIndex < 0) {
throw new Error(
`Invalid path-style AWS S3 URL ${url}, does not contain bucket in the path`,
);
}

return {
path: pathname.substring(slashIndex + 1),
bucket: pathname.substring(0, slashIndex),
region: hostRegion,
};
}
[bucket] = host.split('.');
host = host.substr(bucket.length + 1);

return {
path: pathname,
bucket: hostBucket,
region: hostRegion,
};
}

// Only extract region from *.amazonaws.com hosts
if (config.host === 'amazonaws.com') {
// At this point bucket prefix is removed from host for virtual hosted URLs
const match = host.match(/^s3\.([a-z\d-]+)\.amazonaws\.com$/);
if (!match) {
const usePathStyle =
config.s3ForcePathStyle || host.length === config.host.length;

if (usePathStyle) {
const slashIndex = pathname.indexOf('/');
if (slashIndex < 0) {
throw new Error(
`invalid AWS S3 URL, cannot parse region from host in ${url}`,
`Invalid path-style AWS S3 URL ${url}, does not contain bucket in the path`,
);
}
region = match[1];
} else {
region = '';

return {
path: pathname.substring(slashIndex + 1),
bucket: pathname.substring(0, slashIndex),
region: '',
};
}

return {
path: pathname,
bucket: bucket,
region: region,
bucket: host.substring(0, host.length - config.host.length - 1),
region: '',
};
};
}

/**
* Implements a {@link UrlReader} for AWS S3 buckets.
Expand All @@ -104,11 +122,11 @@ export class AwsS3UrlReader implements UrlReader {
const integrations = ScmIntegrations.fromConfig(config);

return integrations.awsS3.list().map(integration => {
const creds = AwsS3UrlReader.buildCredentials(integration);
const credentials = AwsS3UrlReader.buildCredentials(integration);

const s3 = new S3({
apiVersion: '2006-03-01',
credentials: creds,
credentials,
endpoint: integration.config.endpoint,
s3ForcePathStyle: integration.config.s3ForcePathStyle,
});
Expand Down Expand Up @@ -176,7 +194,7 @@ export class AwsS3UrlReader implements UrlReader {
options?: ReadUrlOptions,
): Promise<ReadUrlResponse> {
try {
const { path, bucket, region } = parseURL(url, this.integration.config);
const { path, bucket, region } = parseUrl(url, this.integration.config);
aws.config.update({ region: region });

let params;
Expand Down Expand Up @@ -216,7 +234,7 @@ export class AwsS3UrlReader implements UrlReader {
options?: ReadTreeOptions,
): Promise<ReadTreeResponse> {
try {
const { path, bucket, region } = parseURL(url, this.integration.config);
const { path, bucket, region } = parseUrl(url, this.integration.config);
const allObjects: ObjectList = [];
const responses = [];
let continuationToken: string | undefined;
Expand Down

0 comments on commit b66f701

Please sign in to comment.