diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7a0ebc55a88400..34500ecc202c4d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -273,7 +273,7 @@ - name: Google Analytics sourceDefinitionId: eff3616a-f9c3-11eb-9a03-0242ac130003 dockerRepository: airbyte/source-google-analytics-v4 - dockerImageTag: 0.1.16 + dockerImageTag: 0.1.17 documentationUrl: https://docs.airbyte.io/integrations/sources/google-analytics-v4 icon: google-analytics.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 1f2c38fc05d652..b48c5bbf1f02b8 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2754,7 +2754,7 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" -- dockerImage: "airbyte/source-google-analytics-v4:0.1.16" +- dockerImage: "airbyte/source-google-analytics-v4:0.1.17" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile b/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile index fb470d3c967e77..609fb81096acd9 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile +++ b/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile @@ -4,13 +4,13 @@ FROM python:3.7-slim RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* WORKDIR /airbyte/integration_code -COPY source_google_analytics_v4 ./source_google_analytics_v4 -COPY main.py ./ COPY setup.py ./ RUN pip install . +COPY source_google_analytics_v4 ./source_google_analytics_v4 +COPY main.py ./ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.16 +LABEL io.airbyte.version=0.1.17 LABEL io.airbyte.name=airbyte/source-google-analytics-v4 diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-analytics-v4/acceptance-test-config.yml index a4960533272747..a697c0fc0955f2 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-google-analytics-v4/acceptance-test-config.yml @@ -1,10 +1,6 @@ # See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) # for more information about how to configure these tests -# The 'future_state_path' field is commented out to skip the test `test_state_with_abnormally_large_values` -# as a temporary solution not to block publishing of new versions. The reason is -# When specifying future date in the state the current implementation of the connector produces records for [current_date, current_date] slice, -# and it makes SAT fail, because it should produce no records with the state with abnormally large values connector_image: airbyte/source-google-analytics-v4:dev tests: spec: @@ -27,7 +23,7 @@ tests: incremental: - config_path: "secrets/service_config.json" configured_catalog_path: "integration_tests/configured_catalog.json" - # future_state_path: "integration_tests/abnormal_state.json" + future_state_path: "integration_tests/abnormal_state.json" full_refresh: - config_path: "secrets/service_config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/abnormal_state.json index 70565e04f926ee..8bbad318c49391 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/abnormal_state.json @@ -10,5 +10,5 @@ "daily_active_users": { "ga_date": "2050-05-01" }, "devices": { "ga_date": "2050-05-01" }, "users_per_day": { "ga_date": "2050-05-01" }, - "sessions_per_country_day": { "ga_date": "2050-05-01" } + "new_users_per_day": {"ga_date": "2050-05-01"} } diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/sample_state.json index 87fd48bf19ddfa..d636881bc2a22d 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/integration_tests/sample_state.json @@ -10,5 +10,5 @@ "daily_active_users": { "ga_date": "2021-02-11" }, "devices": { "ga_date": "2021-02-11" }, "users_per_day": { "ga_date": "2021-02-11" }, - "sessions_per_country_day": { "ga_date": "2021-02-11" } + "new_users_per_day": { "ga_date": "2021-02-11" } } diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/source.py b/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/source.py index 15ada1ca0c518c..46764df7505990 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/source.py +++ b/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/source.py @@ -14,6 +14,7 @@ import jwt import pendulum import requests +from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http import HttpStream @@ -92,7 +93,6 @@ class GoogleAnalyticsV4Stream(HttpStream, ABC): url_base = "https://analyticsreporting.googleapis.com/v4/" report_field = "reports" - data_fields = ["data", "rows"] map_type = dict(INTEGER="integer", FLOAT="number", PERCENT="number", TIME="number") @@ -226,33 +226,29 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs: Any) - ...] """ + today = pendulum.now().date() start_date = pendulum.parse(self.start_date).date() - end_date = pendulum.now().date() - - # Determine stream_state, if no stream_state we use start_date if stream_state: - start_date = pendulum.parse(stream_state.get(self.cursor_field)).date() + prev_end_date = pendulum.parse(stream_state.get(self.cursor_field)).date() + start_date = prev_end_date.add(days=1) + end_date = today + if start_date > end_date: + return [None] - # use the lowest date between start_date and self.end_date, otherwise API fails if start_date is in future - start_date = min(start_date, end_date) date_slices = [] - while start_date <= end_date: - end_date_slice = start_date.add(days=self.window_in_days) + slice_start_date = start_date + while slice_start_date <= end_date: + slice_end_date = slice_start_date.add(days=self.window_in_days) # limit the slice range with end_date - end_date_slice = min(end_date_slice, end_date) - date_slices.append({"startDate": self.to_datetime_str(start_date), "endDate": self.to_datetime_str(end_date_slice)}) - # add 1 day for start next slice from next day and not duplicate data from previous slice end date. - start_date = end_date_slice.add(days=1) - return date_slices - - # TODO: the method has to be updated for more logical and obvious - def get_data(self, data): # type: ignore[no-untyped-def] - for data_field in self.data_fields: - if data and isinstance(data, dict): - data = data.get(data_field, []) - else: - return [] - return data + slice_end_date = min(slice_end_date, end_date) + date_slices.append({"startDate": self.to_datetime_str(slice_start_date), "endDate": self.to_datetime_str(slice_end_date)}) + # start next slice 1 day after previous slice ended to prevent duplicate reads + slice_start_date = slice_end_date.add(days=1) + return date_slices or [None] + + @staticmethod + def report_rows(report_body: MutableMapping[Any, Any]) -> List[MutableMapping[Any, Any]]: + return report_body.get("data", {}).get("rows", []) def lookup_data_type(self, field_type: str, attribute: str) -> str: """ @@ -268,7 +264,7 @@ def lookup_data_type(self, field_type: str, attribute: str) -> str: attr_type = self.dimensions_ref[attribute] elif field_type == "metric": # Custom Google Analytics Metrics {ga:goalXXStarts, ga:metricXX, ... } - # We always treat them as as strings as we can not be sure of their data type + # We always treat them as strings as we can not be sure of their data type if attribute.startswith("ga:goal") and attribute.endswith( ("Starts", "Completions", "Value", "ConversionRate", "Abandons", "AbandonRate") ): @@ -282,10 +278,10 @@ def lookup_data_type(self, field_type: str, attribute: str) -> str: attr_type = self.metrics_ref[attribute] else: attr_type = None - self.logger.error(f"Unsuported GA type: {field_type}") + self.logger.error(f"Unsupported GA type: {field_type}") except KeyError: attr_type = None - self.logger.error(f"Unsuported GA {field_type}: {attribute}") + self.logger.error(f"Unsupported GA {field_type}: {attribute}") return self.map_type.get(attr_type, "string") @@ -387,7 +383,7 @@ def parse_response(self, response: requests.Response, **kwargs: Any) -> Iterable self.check_for_sampled_result(report.get("data", {})) - for row in self.get_data(report): + for row in self.report_rows(report): record = {} dimensions = row.get("dimensions", []) metrics = row.get("metrics", []) @@ -421,11 +417,19 @@ class GoogleAnalyticsV4IncrementalObjectsBase(GoogleAnalyticsV4Stream): cursor_field = "ga_date" def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: - """ - Update the state value, default CDK method. - """ return {self.cursor_field: max(latest_record.get(self.cursor_field, ""), current_stream_state.get(self.cursor_field, ""))} + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + if not stream_slice: + return [] + return super().read_records(sync_mode, cursor_field, stream_slice, stream_state) + class GoogleAnalyticsServiceOauth2Authenticator(Oauth2Authenticator): """Request example for API token extraction: diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/empty_response.json b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/empty_response.json index 0d8067faf9bc7c..36f7e59004028e 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/empty_response.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/empty_response.json @@ -2,11 +2,11 @@ "reports": [ { "columnHeader": { - "dimensions": ["ga: date"], + "dimensions": ["ga:date"], "metricHeader": { "metricHeaderEntries": [ { - "name": "ga: 14dayUsers", + "name": "ga:14dayUsers", "type": "INTEGER" } ] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_is_data_golden_false.json b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_is_data_golden_false.json index 4e2e641ac3f282..ff7e3d23ad2385 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_is_data_golden_false.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_is_data_golden_false.json @@ -2,7 +2,7 @@ "reports": [ { "columnHeader": { - "dimensions": ["ga: date"], + "dimensions": ["ga:date"], "metricHeader": { "metricHeaderEntries": [ { diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_records.json b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_records.json index 80a46d877be802..be89bd58587622 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_records.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_records.json @@ -2,11 +2,11 @@ "reports": [ { "columnHeader": { - "dimensions": ["ga: date"], + "dimensions": ["ga:date"], "metricHeader": { "metricHeaderEntries": [ { - "name": "ga: 14dayUsers", + "name": "ga:14dayUsers", "type": "INTEGER" } ] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_sampling.json b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_sampling.json index b116b5f012621e..0c6a54151a31e3 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_sampling.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/response_with_sampling.json @@ -2,11 +2,11 @@ "reports": [ { "columnHeader": { - "dimensions": ["ga: date"], + "dimensions": ["ga:date"], "metricHeader": { "metricHeaderEntries": [ { - "name": "ga: 14dayUsers", + "name": "ga:14dayUsers", "type": "INTEGER" } ] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/unit_test.py index 6af81b43f58a6e..cc92af277a14b7 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-google-analytics-v4/unit_tests/unit_test.py @@ -2,6 +2,7 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +import copy import json import logging from pathlib import Path @@ -10,7 +11,7 @@ import pendulum import pytest -from airbyte_cdk.models import ConfiguredAirbyteCatalog +from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode from airbyte_cdk.sources.streams.http.auth import NoAuth from freezegun import freeze_time from source_google_analytics_v4.source import ( @@ -104,16 +105,45 @@ def mock_api_returns_is_data_golden_false(requests_mock): ) +@pytest.fixture +def configured_catalog(): + return ConfiguredAirbyteCatalog.parse_obj(json.loads(read_file("./configured_catalog.json"))) + + @pytest.fixture() def test_config(): - test_config = json.loads(read_file("../integration_tests/sample_config.json")) - test_config["authenticator"] = NoAuth() - test_config["metrics"] = [] - test_config["dimensions"] = [] + test_conf = { + "view_id": "1234567", + "window_in_days": 1, + "authenticator": NoAuth(), + "metrics": [], + "start_date": pendulum.now().subtract(days=2).date().strftime("%Y-%m-%d"), + "dimensions": [], + "credentials": { + "type": "Service", + }, + } + return copy.deepcopy(test_conf) + + +@pytest.fixture() +def test_config_auth_service(test_config): + test_config["credentials"] = { + "auth_type": "Service", + "credentials_json": '{"client_email": "", "private_key": "", "private_key_id": ""}', + } + return copy.deepcopy(test_config) + + +@pytest.fixture() +def test_config_auth_client(test_config): test_config["credentials"] = { - "type": "Service", + "auth_type": "Client", + "client_id": "client_id_val", + "client_secret": "client_secret_val", + "refresh_token": "refresh_token_val", } - return test_config + return copy.deepcopy(test_config) def test_metrics_dimensions_type_list(mock_metrics_dimensions_type_list_link): @@ -144,42 +174,39 @@ def test_lookup_metrics_dimensions_data_type(test_config, metrics_dimensions_map def test_data_is_not_golden_is_logged_as_warning( mock_api_returns_is_data_golden_false, test_config, + configured_catalog, mock_metrics_dimensions_type_list_link, mock_auth_call, caplog, ): source = SourceGoogleAnalyticsV4() - del test_config["custom_reports"] - catalog = ConfiguredAirbyteCatalog.parse_obj(json.loads(read_file("./configured_catalog.json"))) - list(source.read(logging.getLogger(), test_config, catalog)) + list(source.read(logging.getLogger(), test_config, configured_catalog)) assert DATA_IS_NOT_GOLDEN_MSG in caplog.text def test_sampled_result_is_logged_as_warning( mock_api_returns_sampled_results, test_config, + configured_catalog, mock_metrics_dimensions_type_list_link, mock_auth_call, caplog, ): source = SourceGoogleAnalyticsV4() - del test_config["custom_reports"] - catalog = ConfiguredAirbyteCatalog.parse_obj(json.loads(read_file("./configured_catalog.json"))) - list(source.read(logging.getLogger(), test_config, catalog)) + list(source.read(logging.getLogger(), test_config, configured_catalog)) assert RESULT_IS_SAMPLED_MSG in caplog.text def test_no_regressions_for_result_is_sampled_and_data_is_golden_warnings( mock_api_returns_valid_records, test_config, + configured_catalog, mock_metrics_dimensions_type_list_link, mock_auth_call, caplog, ): source = SourceGoogleAnalyticsV4() - del test_config["custom_reports"] - catalog = ConfiguredAirbyteCatalog.parse_obj(json.loads(read_file("./configured_catalog.json"))) - list(source.read(logging.getLogger(), test_config, catalog)) + list(source.read(logging.getLogger(), test_config, configured_catalog)) assert RESULT_IS_SAMPLED_MSG not in caplog.text assert DATA_IS_NOT_GOLDEN_MSG not in caplog.text @@ -187,6 +214,7 @@ def test_no_regressions_for_result_is_sampled_and_data_is_golden_warnings( @patch("source_google_analytics_v4.source.jwt") def test_check_connection_fails_jwt( jwt_encode_mock, + test_config_auth_service, mocker, mock_metrics_dimensions_type_list_link, mock_auth_call, @@ -196,17 +224,12 @@ def test_check_connection_fails_jwt( check_connection fails because of the API returns no records, then we assume than user doesn't have permission to read requested `view` """ - test_config = json.loads(read_file("../integration_tests/sample_config.json")) - del test_config["custom_reports"] - test_config["credentials"] = { - "auth_type": "Service", - "credentials_json": '{"client_email": "", "private_key": "", "private_key_id": ""}', - } source = SourceGoogleAnalyticsV4() - is_success, msg = source.check_connection(MagicMock(), test_config) + is_success, msg = source.check_connection(MagicMock(), test_config_auth_service) assert is_success is False assert ( - msg == f"Please check the permissions for the requested view_id: {test_config['view_id']}. Cannot retrieve data from that view ID." + msg + == f"Please check the permissions for the requested view_id: {test_config_auth_service['view_id']}. Cannot retrieve data from that view ID." ) jwt_encode_mock.encode.assert_called() assert mock_auth_call.called @@ -216,6 +239,7 @@ def test_check_connection_fails_jwt( @patch("source_google_analytics_v4.source.jwt") def test_check_connection_success_jwt( jwt_encode_mock, + test_config_auth_service, mocker, mock_metrics_dimensions_type_list_link, mock_auth_call, @@ -225,14 +249,8 @@ def test_check_connection_success_jwt( check_connection succeeds because of the API returns valid records for the latest date based slice, then we assume than user has permission to read requested `view` """ - test_config = json.loads(read_file("../integration_tests/sample_config.json")) - del test_config["custom_reports"] - test_config["credentials"] = { - "auth_type": "Service", - "credentials_json": '{"client_email": "", "private_key": "", "private_key_id": ""}', - } source = SourceGoogleAnalyticsV4() - is_success, msg = source.check_connection(MagicMock(), test_config) + is_success, msg = source.check_connection(MagicMock(), test_config_auth_service) assert is_success is True assert msg is None jwt_encode_mock.encode.assert_called() @@ -243,6 +261,7 @@ def test_check_connection_success_jwt( @patch("source_google_analytics_v4.source.jwt") def test_check_connection_fails_oauth( jwt_encode_mock, + test_config_auth_client, mocker, mock_metrics_dimensions_type_list_link, mock_auth_call, @@ -252,19 +271,12 @@ def test_check_connection_fails_oauth( check_connection fails because of the API returns no records, then we assume than user doesn't have permission to read requested `view` """ - test_config = json.loads(read_file("../integration_tests/sample_config.json")) - del test_config["custom_reports"] - test_config["credentials"] = { - "auth_type": "Client", - "client_id": "client_id_val", - "client_secret": "client_secret_val", - "refresh_token": "refresh_token_val", - } source = SourceGoogleAnalyticsV4() - is_success, msg = source.check_connection(MagicMock(), test_config) + is_success, msg = source.check_connection(MagicMock(), test_config_auth_client) assert is_success is False assert ( - msg == f"Please check the permissions for the requested view_id: {test_config['view_id']}. Cannot retrieve data from that view ID." + msg + == f"Please check the permissions for the requested view_id: {test_config_auth_client['view_id']}. Cannot retrieve data from that view ID." ) jwt_encode_mock.encode.assert_not_called() assert "https://www.googleapis.com/auth/analytics.readonly" in unquote(mock_auth_call.last_request.body) @@ -278,6 +290,7 @@ def test_check_connection_fails_oauth( @patch("source_google_analytics_v4.source.jwt") def test_check_connection_success_oauth( jwt_encode_mock, + test_config_auth_client, mocker, mock_metrics_dimensions_type_list_link, mock_auth_call, @@ -287,16 +300,8 @@ def test_check_connection_success_oauth( check_connection succeeds because of the API returns valid records for the latest date based slice, then we assume than user has permission to read requested `view` """ - test_config = json.loads(read_file("../integration_tests/sample_config.json")) - del test_config["custom_reports"] - test_config["credentials"] = { - "auth_type": "Client", - "client_id": "client_id_val", - "client_secret": "client_secret_val", - "refresh_token": "refresh_token_val", - } source = SourceGoogleAnalyticsV4() - is_success, msg = source.check_connection(MagicMock(), test_config) + is_success, msg = source.check_connection(MagicMock(), test_config_auth_client) assert is_success is True assert msg is None jwt_encode_mock.encode.assert_not_called() @@ -314,24 +319,63 @@ def test_unknown_metrics_or_dimensions_error_validation(mock_metrics_dimensions_ @freeze_time("2021-11-30") -def test_stream_slices_limited_by_current_date(test_config): +def test_stream_slices_limited_by_current_date(test_config, mock_metrics_dimensions_type_list_link): + test_config["window_in_days"] = 14 g = GoogleAnalyticsV4IncrementalObjectsBase(config=test_config) - stream_state = {"ga_date": "2050-05-01"} + stream_state = {"ga_date": "2021-11-25"} slices = g.stream_slices(stream_state=stream_state) current_date = pendulum.now().date().strftime("%Y-%m-%d") - - assert len(slices) == 1 - assert slices[0]["startDate"] == slices[0]["endDate"] - assert slices[0]["endDate"] == current_date + assert slices == [{"startDate": "2021-11-26", "endDate": current_date}] @freeze_time("2021-11-30") -def test_stream_slices_start_from_current_date_if_abnornal_state_is_passed(test_config): +def test_empty_stream_slice_if_abnormal_state_is_passed(test_config, mock_metrics_dimensions_type_list_link): g = GoogleAnalyticsV4IncrementalObjectsBase(config=test_config) stream_state = {"ga_date": "2050-05-01"} slices = g.stream_slices(stream_state=stream_state) - current_date = pendulum.now().date().strftime("%Y-%m-%d") + assert slices == [None] - assert len(slices) == 1 - assert slices[0]["startDate"] == slices[0]["endDate"] - assert slices[0]["startDate"] == current_date + +def test_empty_slice_produces_no_records(test_config, mock_metrics_dimensions_type_list_link): + g = GoogleAnalyticsV4IncrementalObjectsBase(config=test_config) + records = g.read_records(sync_mode=SyncMode.incremental, stream_slice=None, stream_state={g.cursor_field: g.start_date}) + assert next(iter(records), None) is None + + +def test_state_saved_after_each_record(test_config, mock_metrics_dimensions_type_list_link): + today_dt = pendulum.now().date() + before_yesterday = today_dt.subtract(days=2).strftime("%Y-%m-%d") + today = today_dt.strftime("%Y-%m-%d") + record = {"ga_date": today} + g = GoogleAnalyticsV4IncrementalObjectsBase(config=test_config) + state = {g.cursor_field: before_yesterday} + assert g.get_updated_state(state, record) == {g.cursor_field: today} + + +def test_connection_fail_invalid_reports_json(test_config): + source = SourceGoogleAnalyticsV4() + test_config["custom_reports"] = "[{'data': {'ga:foo': 'ga:bar'}}]" + ok, error = source.check_connection(logging.getLogger(), test_config) + assert not ok + assert "Invalid custom reports json structure." in error + + +@pytest.mark.parametrize( + ("status", "json_resp"), + ( + (403, {"error": "Your role is not not granted the permission for accessing this resource"}), + (500, {"error": "Internal server error, please contact support"}), + ), +) +def test_connection_fail_due_to_http_status( + mocker, test_config, requests_mock, mock_auth_call, mock_metrics_dimensions_type_list_link, status, json_resp +): + mocker.patch("time.sleep") + requests_mock.post("https://analyticsreporting.googleapis.com/v4/reports:batchGet", status_code=status, json=json_resp) + source = SourceGoogleAnalyticsV4() + ok, error = source.check_connection(logging.getLogger(), test_config) + assert not ok + if status == 403: + assert "Please check the permissions for the requested view_id" in error + assert test_config["view_id"] in error + assert json_resp["error"] in error diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 54b527a856e8b4..16e2ee7c8be518 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -2,13 +2,13 @@ ## Features -| Feature | Supported? | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental Sync | Yes | -| Replicate Incremental Deletes | No | -| SSL connection | Yes | -| Custom Reports | Yes | +| Feature | Supported? | +|:------------------------------|:-----------| +| Full Refresh Sync | Yes | +| Incremental Sync | Yes | +| Replicate Incremental Deletes | No | +| SSL connection | Yes | +| Custom Reports | Yes | ### Supported Tables @@ -140,21 +140,22 @@ Google Analytics API may return provisional or incomplete data. When this occurs ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.16 | 2022-01-26 | [9480](https://github.com/airbytehq/airbyte/pull/9480) | Reintroduce `window_in_days` and log warning when sampling occurs | -| 0.1.15 | 2021-12-28 | [9165](https://github.com/airbytehq/airbyte/pull/9165) | Update titles and descriptions | -| 0.1.14 | 2021-12-09 | [8656](https://github.com/airbytehq/airbyte/pull/8656) | Fix date-format in schemas | -| 0.1.13 | 2021-12-09 | [8676](https://github.com/airbytehq/airbyte/pull/8676) | Fix `window_in_days` validation issue | -| 0.1.12 | 2021-12-03 | [8175](https://github.com/airbytehq/airbyte/pull/8175) | Fix validation of unknown metric(s) or dimension(s) error | -| 0.1.11 | 2021-11-30 | [8264](https://github.com/airbytehq/airbyte/pull/8264) | Corrected date range | -| 0.1.10 | 2021-11-19 | [8087](https://github.com/airbytehq/airbyte/pull/8087) | Support `start_date` before the account has any data | -| 0.1.9 | 2021-10-27 | [7410](https://github.com/airbytehq/airbyte/pull/7410) | Add check for correct permission for requested `view_id` | -| 0.1.8 | 2021-10-13 | [7020](https://github.com/airbytehq/airbyte/pull/7020) | Add intermediary auth config support | -| 0.1.7 | 2021-10-07 | [6414](https://github.com/airbytehq/airbyte/pull/6414) | Declare oauth parameters in google sources | -| 0.1.6 | 2021-09-27 | [6459](https://github.com/airbytehq/airbyte/pull/6459) | Update OAuth Spec File | -| 0.1.3 | 2021-09-21 | [6357](https://github.com/airbytehq/airbyte/pull/6357) | Fix oauth workflow parameters | -| 0.1.2 | 2021-09-20 | [6306](https://github.com/airbytehq/airbyte/pull/6306) | Support of airbyte OAuth initialization flow | -| 0.1.1 | 2021-08-25 | [5655](https://github.com/airbytehq/airbyte/pull/5655) | Corrected validation of empty custom report | -| 0.1.0 | 2021-08-10 | [5290](https://github.com/airbytehq/airbyte/pull/5290) | Initial Release | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------| +| 0.1.17 | 2022-03-31 | [11512](https://github.com/airbytehq/airbyte/pull/11512) | Improved Unit and Acceptance tests coverage, fix `read` with abnormally large state values | +| 0.1.16 | 2022-01-26 | [9480](https://github.com/airbytehq/airbyte/pull/9480) | Reintroduce `window_in_days` and log warning when sampling occurs | +| 0.1.15 | 2021-12-28 | [9165](https://github.com/airbytehq/airbyte/pull/9165) | Update titles and descriptions | +| 0.1.14 | 2021-12-09 | [8656](https://github.com/airbytehq/airbyte/pull/8656) | Fix date-format in schemas | +| 0.1.13 | 2021-12-09 | [8676](https://github.com/airbytehq/airbyte/pull/8676) | Fix `window_in_days` validation issue | +| 0.1.12 | 2021-12-03 | [8175](https://github.com/airbytehq/airbyte/pull/8175) | Fix validation of unknown metric(s) or dimension(s) error | +| 0.1.11 | 2021-11-30 | [8264](https://github.com/airbytehq/airbyte/pull/8264) | Corrected date range | +| 0.1.10 | 2021-11-19 | [8087](https://github.com/airbytehq/airbyte/pull/8087) | Support `start_date` before the account has any data | +| 0.1.9 | 2021-10-27 | [7410](https://github.com/airbytehq/airbyte/pull/7410) | Add check for correct permission for requested `view_id` | +| 0.1.8 | 2021-10-13 | [7020](https://github.com/airbytehq/airbyte/pull/7020) | Add intermediary auth config support | +| 0.1.7 | 2021-10-07 | [6414](https://github.com/airbytehq/airbyte/pull/6414) | Declare oauth parameters in google sources | +| 0.1.6 | 2021-09-27 | [6459](https://github.com/airbytehq/airbyte/pull/6459) | Update OAuth Spec File | +| 0.1.3 | 2021-09-21 | [6357](https://github.com/airbytehq/airbyte/pull/6357) | Fix oauth workflow parameters | +| 0.1.2 | 2021-09-20 | [6306](https://github.com/airbytehq/airbyte/pull/6306) | Support of airbyte OAuth initialization flow | +| 0.1.1 | 2021-08-25 | [5655](https://github.com/airbytehq/airbyte/pull/5655) | Corrected validation of empty custom report | +| 0.1.0 | 2021-08-10 | [5290](https://github.com/airbytehq/airbyte/pull/5290) | Initial Release |