diff --git a/awswrangler/s3/_list.py b/awswrangler/s3/_list.py index 6749315e1..8147dba20 100644 --- a/awswrangler/s3/_list.py +++ b/awswrangler/s3/_list.py @@ -1,6 +1,7 @@ """Amazon S3 List Module (PRIVATE).""" import datetime +import fnmatch import logging from typing import Any, Dict, List, Optional, Sequence, Union @@ -64,10 +65,12 @@ def _list_objects( last_modified_begin: Optional[datetime.datetime] = None, last_modified_end: Optional[datetime.datetime] = None, boto3_session: Optional[boto3.Session] = None, + wildcard_character: str = "*", ) -> List[str]: + wildcard_prefix: str = path.split(wildcard_character)[0] bucket: str prefix: str - bucket, prefix = _utils.parse_path(path=path) + bucket, prefix = _utils.parse_path(path=wildcard_prefix) _suffix: Union[List[str], None] = [suffix] if isinstance(suffix, str) else suffix _ignore_suffix: Union[List[str], None] = [ignore_suffix] if isinstance(ignore_suffix, str) else ignore_suffix client_s3: boto3.client = _utils.client(service_name="s3", session=boto3_session) @@ -102,6 +105,9 @@ def _list_objects( key = pfx["Prefix"] paths.append(f"s3://{bucket}/{key}") + if wildcard_character in path: + paths = fnmatch.filter(paths, path) + return paths if _ignore_suffix is None else [p for p in paths if p.endswith(tuple(_ignore_suffix)) is False]