Skip to content

Commit

Permalink
Added a slightly changed version from the awscli find bucket function (
Browse files Browse the repository at this point in the history
…#648)

* Added a slightly changed version from the awscli find bucket function

* lint

Co-authored-by: Martin Durant <martin.durant@alumni.utoronto.ca>
  • Loading branch information
saineju and martindurant committed Sep 26, 2022
1 parent ad2c9b8 commit 227a905
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
31 changes: 30 additions & 1 deletion s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import socket
from typing import Tuple, Optional
import weakref
import re

from urllib3.exceptions import IncompleteRead

Expand Down Expand Up @@ -363,6 +364,34 @@ def _get_kwargs_from_urls(urlpath):
out["version_aware"] = True
return out

def _find_bucket_key(self, s3_path):
"""
This is a helper function that given an s3 path such that the path is of
the form: bucket/key
It will return the bucket and the key represented by the s3 path
"""

_S3_ACCESSPOINT_TO_BUCKET_KEY_REGEX = re.compile(
r"^(?P<bucket>arn:(aws).*:s3:[a-z\-0-9]*:[0-9]{12}:accesspoint[:/][^/]+)/?"
r"(?P<key>.*)$"
)
_S3_OUTPOST_TO_BUCKET_KEY_REGEX = re.compile(
r"^(?P<bucket>arn:(aws).*:s3-outposts:[a-z\-0-9]+:[0-9]{12}:outpost[/:]"
r"[a-zA-Z0-9\-]{1,63}[/:](bucket|accesspoint)[/:][a-zA-Z0-9\-]{1,63})[/:]?(?P<key>.*)$"
)
match = _S3_ACCESSPOINT_TO_BUCKET_KEY_REGEX.match(s3_path)
if match:
return match.group("bucket"), match.group("key")
match = _S3_OUTPOST_TO_BUCKET_KEY_REGEX.match(s3_path)
if match:
return match.group("bucket"), match.group("key")
s3_components = s3_path.split("/", 1)
bucket = s3_components[0]
s3_key = ""
if len(s3_components) > 1:
s3_key = s3_components[1]
return bucket, s3_key

def split_path(self, path) -> Tuple[str, str, Optional[str]]:
"""
Normalise S3 path string into bucket and key.
Expand All @@ -385,7 +414,7 @@ def split_path(self, path) -> Tuple[str, str, Optional[str]]:
if "/" not in path:
return path, "", None
else:
bucket, keypart = path.split("/", 1)
bucket, keypart = self._find_bucket_key(path)
key, _, version_id = keypart.partition("?versionId=")
return (
bucket,
Expand Down
14 changes: 14 additions & 0 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2392,3 +2392,17 @@ def test_list_del_multipart(s3):
f.close() # may error
except Exception:
pass


def test_split_path(s3):
buckets = [
"my-test-bucket",
"arn:aws:s3:region:123456789012:accesspoint/my-access-point-name",
"arn:aws:s3-outposts:region:123456789012:outpost/outpost-id/bucket/my-test-bucket",
"arn:aws:s3-outposts:region:123456789012:outpost/outpost-id/accesspoint/my-accesspoint-name",
]
test_key = "my/test/path"
for test_bucket in buckets:
bucket, key, _ = s3.split_path("s3://" + test_bucket + "/" + test_key)
assert bucket == test_bucket
assert key == test_key

0 comments on commit 227a905

Please sign in to comment.