From 1d99b58eb67bfb74751ed070740400ac7ae91aab Mon Sep 17 00:00:00 2001 From: Denis Davydov Date: Thu, 16 Jun 2022 21:44:55 +0300 Subject: [PATCH] Source Amplitude: try-catch BadZipFile exception (#13846) * #272 oncall: source Amplitude - try-catch BadZipFile exception * #272-oncall: upd changelog * auto-bump connector version Co-authored-by: Octavia Squidington III --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-amplitude/Dockerfile | 2 +- .../source-amplitude/source_amplitude/api.py | 11 ++++++- docs/integrations/sources/amplitude.md | 33 ++++++++++--------- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 5bd1c3d2c5fe41..c98b9edc405877 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -40,7 +40,7 @@ - name: Amplitude sourceDefinitionId: fa9f58c6-2d03-4237-aaa4-07d75e0c1396 dockerRepository: airbyte/source-amplitude - dockerImageTag: 0.1.9 + dockerImageTag: 0.1.10 documentationUrl: https://docs.airbyte.io/integrations/sources/amplitude icon: amplitude.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index cb15c80310a574..87af2986067b9b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -547,7 +547,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amplitude:0.1.9" +- dockerImage: "airbyte/source-amplitude:0.1.10" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amplitude" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-amplitude/Dockerfile b/airbyte-integrations/connectors/source-amplitude/Dockerfile index ed815d8393e475..3d88bb080ab96c 100644 --- a/airbyte-integrations/connectors/source-amplitude/Dockerfile +++ b/airbyte-integrations/connectors/source-amplitude/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.9 +LABEL io.airbyte.version=0.1.10 LABEL io.airbyte.name=airbyte/source-amplitude diff --git a/airbyte-integrations/connectors/source-amplitude/source_amplitude/api.py b/airbyte-integrations/connectors/source-amplitude/source_amplitude/api.py index 6b79096cb2b32f..65636acbdbd769 100644 --- a/airbyte-integrations/connectors/source-amplitude/source_amplitude/api.py +++ b/airbyte-integrations/connectors/source-amplitude/source_amplitude/api.py @@ -144,7 +144,16 @@ class Events(IncrementalAmplitudeStream): def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: state_value = stream_state[self.cursor_field] if stream_state else self._start_date.strftime(self.compare_date_template) - zip_file = zipfile.ZipFile(io.BytesIO(response.content)) + try: + zip_file = zipfile.ZipFile(io.BytesIO(response.content)) + except zipfile.BadZipFile as e: + self.logger.exception(e) + self.logger.error( + f"Received an invalid zip file in response to URL: {response.request.url}." + f"The size of the response body is: {len(response.content)}" + ) + return [] + for gzip_filename in zip_file.namelist(): with zip_file.open(gzip_filename) as file: for record in self._parse_zip_file(file): diff --git a/docs/integrations/sources/amplitude.md b/docs/integrations/sources/amplitude.md index 8279d8d9b49622..d267da413a62a6 100644 --- a/docs/integrations/sources/amplitude.md +++ b/docs/integrations/sources/amplitude.md @@ -46,10 +46,10 @@ If there are more endpoints you'd like Airbyte to support, please [create an iss The `Amplitude` source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -| Feature | Supported? | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental Sync | Yes | +| Feature | Supported? | +|:------------------|:-----------| +| Full Refresh Sync | Yes | +| Incremental Sync | Yes | ### Performance considerations @@ -57,15 +57,16 @@ The Amplitude connector should gracefully handle Amplitude API limitations under ## Changelog -| Version | Date | Pull Request | Subject | -|:--------| :--------- | :----------------------------------------------------- | :------ | -| 0.1.9 | 2022-06-10 | [13638](https://github.com/airbytehq/airbyte/pull/13638) | Fixed an infinite loop when fetching Amplitude data | -| 0.1.8 | 2022-06-01 | [13373](https://github.com/airbytehq/airbyte/pull/13373) | Fixed the issue when JSON Validator produces erros on `date-time` check | -| 0.1.7 | 2022-05-21 | [13074](https://github.com/airbytehq/airbyte/pull/13074) | Removed time offset for `Events` stream, which caused a lot of duplicated records | -| 0.1.6 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | -| 0.1.5 | 2022-04-28 | [12430](https://github.com/airbytehq/airbyte/pull/12430) | Added HTTP error descriptions and fixed `Events` stream fail caused by `404` HTTP Error | -| 0.1.4 | 2021-12-23 | [8434](https://github.com/airbytehq/airbyte/pull/8434) | Update fields in source-connectors specifications | -| 0.1.3 | 2021-10-12 | [6375](https://github.com/airbytehq/airbyte/pull/6375) | Log Transient 404 Error in Events stream | -| 0.1.2 | 2021-09-21 | [6353](https://github.com/airbytehq/airbyte/pull/6353) | Correct output schemas on cohorts, events, active\_users, and average\_session\_lengths streams | -| 0.1.1 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE\_ENTRYPOINT for kubernetes support | -| 0.1.0 | 2021-06-08 | [3664](https://github.com/airbytehq/airbyte/pull/3664) | New Source: Amplitude | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------| +| 0.1.10 | 2022-06-16 | [13846](https://github.com/airbytehq/airbyte/pull/13846) | Try-catch the BadZipFile error | +| 0.1.9 | 2022-06-10 | [13638](https://github.com/airbytehq/airbyte/pull/13638) | Fixed an infinite loop when fetching Amplitude data | +| 0.1.8 | 2022-06-01 | [13373](https://github.com/airbytehq/airbyte/pull/13373) | Fixed the issue when JSON Validator produces errors on `date-time` check | +| 0.1.7 | 2022-05-21 | [13074](https://github.com/airbytehq/airbyte/pull/13074) | Removed time offset for `Events` stream, which caused a lot of duplicated records | +| 0.1.6 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | +| 0.1.5 | 2022-04-28 | [12430](https://github.com/airbytehq/airbyte/pull/12430) | Added HTTP error descriptions and fixed `Events` stream fail caused by `404` HTTP Error | +| 0.1.4 | 2021-12-23 | [8434](https://github.com/airbytehq/airbyte/pull/8434) | Update fields in source-connectors specifications | +| 0.1.3 | 2021-10-12 | [6375](https://github.com/airbytehq/airbyte/pull/6375) | Log Transient 404 Error in Events stream | +| 0.1.2 | 2021-09-21 | [6353](https://github.com/airbytehq/airbyte/pull/6353) | Correct output schemas on cohorts, events, active\_users, and average\_session\_lengths streams | +| 0.1.1 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE\_ENTRYPOINT for kubernetes support | +| 0.1.0 | 2021-06-08 | [3664](https://github.com/airbytehq/airbyte/pull/3664) | New Source: Amplitude |