Skip to content

Commit

Permalink
✨ Source S3: Add handling NoSuchBucket error (#31383)
Browse files Browse the repository at this point in the history
  • Loading branch information
tolik0 committed Oct 24, 2023
1 parent 2014cd8 commit 053d08e
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 18 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-s3/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2
dockerImageTag: 4.1.1
dockerImageTag: 4.1.2
dockerRepository: airbyte/source-s3
documentationUrl: https://docs.airbyte.com/integrations/sources/s3
githubIssueLabel: source-s3
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-s3/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-cdk>=0.51.35",
"airbyte-cdk>=0.52.0",
"pyarrow==12.0.1",
"smart-open[s3]==5.1.0",
"wcmatch==8.4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
import boto3.session
import pytz
import smart_open
from airbyte_cdk.sources.file_based.exceptions import ErrorListingFiles, FileBasedSourceError
from airbyte_cdk.models import FailureType
from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from botocore.client import BaseClient
from botocore.client import Config as ClientConfig
from botocore.exceptions import ClientError
from source_s3.v4.config import Config
from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler

Expand Down Expand Up @@ -68,25 +70,30 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo
total_n_keys = 0

try:
if prefixes:
for prefix in prefixes:
for remote_file in self._page(s3, globs, self.config.bucket, prefix, seen, logger):
total_n_keys += 1
yield remote_file
else:
for remote_file in self._page(s3, globs, self.config.bucket, None, seen, logger):
for current_prefix in prefixes if prefixes else [None]:
for remote_file in self._page(s3, globs, self.config.bucket, current_prefix, seen, logger):
total_n_keys += 1
yield remote_file

logger.info(f"Finished listing objects from S3. Found {total_n_keys} objects total ({len(seen)} unique objects).")
except ClientError as exc:
if exc.response["Error"]["Code"] == "NoSuchBucket":
raise CustomFileBasedException(
f"The bucket {self.config.bucket} does not exist.", failure_type=FailureType.config_error, exception=exc
)
self._raise_error_listing_files(globs, exc)
except Exception as exc:
raise ErrorListingFiles(
FileBasedSourceError.ERROR_LISTING_FILES,
source="s3",
bucket=self.config.bucket,
globs=globs,
endpoint=self.config.endpoint,
) from exc
self._raise_error_listing_files(globs, exc)

def _raise_error_listing_files(self, globs: List[str], exc: Optional[Exception] = None):
"""Helper method to raise the ErrorListingFiles exception."""
raise ErrorListingFiles(
FileBasedSourceError.ERROR_LISTING_FILES,
source="s3",
bucket=self.config.bucket,
globs=globs,
endpoint=self.config.endpoint,
) from exc

def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
try:
Expand Down
3 changes: 2 additions & 1 deletion docs/integrations/sources/s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ One record will be emitted for each document. Keep in mind that large files can
## Changelog

| Version | Date | Pull Request | Subject |
| :------ | :--------- | :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- |
|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- |
| 4.1.2 | 2023-10-23 | [31383](https://github.com/airbytehq/airbyte/pull/31383) | Add handling NoSuchBucket error |
| 4.1.1 | 2023-10-19 | [31601](https://github.com/airbytehq/airbyte/pull/31601) | Base image migration: remove Dockerfile and use the python-connector-base image |
| 4.1.0 | 2023-10-17 | [31340](https://github.com/airbytehq/airbyte/pull/31340) | Add reading files inside zip archive |
| 4.0.5 | 2023-10-16 | [31209](https://github.com/airbytehq/airbyte/pull/31209) | Add experimental Markdown/PDF/Docx file format |
Expand Down

0 comments on commit 053d08e

Please sign in to comment.