diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1362d862a7c82c..817e65bc15f2c8 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -410,7 +410,7 @@ - name: File sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.2.28 + dockerImageTag: 0.2.30 documentationUrl: https://docs.airbyte.com/integrations/sources/file icon: file.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 4bbd82fca594c0..ced16e481c2edf 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3636,7 +3636,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.2.28" +- dockerImage: "airbyte/source-file:0.2.30" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/file" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index 0bfb5bbdbe95ce..b9dfdd8c72b1b0 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -9,5 +9,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.28 +LABEL io.airbyte.version=0.2.30 LABEL io.airbyte.name=airbyte/source-file-secure diff --git a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml index ad5aeb0e006e1e..97ed8432dc5f91 100644 --- a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml @@ -15,7 +15,7 @@ tests: status: "succeed" # for local should be failed - config_path: "integration_tests/local_config.json" - status: "exception" + status: "failed" discovery: # for https diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index 6a54c7114f1612..29963cebc9a1f8 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.28 +LABEL io.airbyte.version=0.2.30 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/source_file/source.py b/airbyte-integrations/connectors/source-file/source_file/source.py index a9c51c0e865431..03449f5e754565 100644 --- a/airbyte-integrations/connectors/source-file/source_file/source.py +++ b/airbyte-integrations/connectors/source-file/source_file/source.py @@ -8,6 +8,7 @@ import traceback from datetime import datetime from typing import Any, Iterable, Iterator, Mapping, MutableMapping +from urllib.parse import urlparse from airbyte_cdk import AirbyteLogger from airbyte_cdk.models import ( @@ -83,10 +84,14 @@ def _validate_and_transform(self, config: Mapping[str, Any]): try: config["reader_options"] = json.loads(config["reader_options"]) except ValueError: - raise Exception("reader_options is not valid JSON") + raise ConfigurationError("reader_options is not valid JSON") else: config["reader_options"] = {} config["url"] = dropbox_force_download(config["url"]) + + parse_result = urlparse(config["url"]) + if parse_result.netloc == "docs.google.com" and parse_result.path.lower().startswith("/spreadsheets/"): + raise ConfigurationError(f'Failed to load {config["url"]}: please use the Official Google Sheets Source connector') return config def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: @@ -94,14 +99,14 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: Check involves verifying that the specified file is reachable with our credentials. """ - config = self._validate_and_transform(config) + try: + config = self._validate_and_transform(config) + except ConfigurationError as e: + logger.error(str(e)) + return AirbyteConnectionStatus(status=Status.FAILED, message=str(e)) + client = self._get_client(config) source_url = client.reader.full_url - logger.info(f"Checking access to {source_url}...") - if "docs.google.com/spreadsheets" in source_url: - reason = f"Failed to load {source_url}: please use the Official Google Sheets Source connector" - logger.error(reason) - return AirbyteConnectionStatus(status=Status.FAILED, message=reason) try: with client.reader.open(): list(client.streams) diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py index ea71245a484569..56a5a030465b2e 100644 --- a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py @@ -149,5 +149,14 @@ def test_discover(source, config, client): def test_check_wrong_reader_options(source, config): config["reader_options"] = '{encoding":"utf_16"}' - with pytest.raises(Exception): - source.check(logger=logger, config=config) + assert source.check(logger=logger, config=config) == AirbyteConnectionStatus( + status=Status.FAILED, message="reader_options is not valid JSON" + ) + + +def test_check_google_spreadsheets_url(source, config): + config["url"] = "https://docs.google.com/spreadsheets/d/" + assert source.check(logger=logger, config=config) == AirbyteConnectionStatus( + status=Status.FAILED, + message="Failed to load https://docs.google.com/spreadsheets/d/: please use the Official Google Sheets Source connector", + ) diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 01fe8622616ed2..2869f82ade8c68 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -129,6 +129,7 @@ In order to read large files from a remote location, this connector uses the [sm | Version | Date | Pull Request | Subject | | ------- | ---------- | -------------------------------------------------------- | -------------------------------------------------------- | +| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | | 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` | | 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | | 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link |