From 671410dd5dd6a7abea637f76bdaa28c22c73fa27 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Mon, 9 Jan 2023 17:43:21 +0000 Subject: [PATCH 01/14] test_token_rotation added Signed-off-by: Sergey Chvalyuk --- .../source-google-analytics-data-api/setup.py | 2 ++ .../unit_tests/test_authenticator.py | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_authenticator.py diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py b/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py index 87ff851e1c54d..5e18b8eae8ada 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py @@ -8,8 +8,10 @@ MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1", "google-analytics-data==0.11.2", "PyJWT==2.4.0", "cryptography==37.0.4", "requests==2.28.1"] TEST_REQUIREMENTS = [ + "freezegun", "pytest~=6.1", "pytest-mock~=3.6.1", + "requests-mock~=1.9", "source-acceptance-test", ] diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_authenticator.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_authenticator.py new file mode 100644 index 0000000000000..d95d69f14aae5 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_authenticator.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +import requests +from freezegun import freeze_time +from source_google_analytics_data_api.authenticator import GoogleServiceKeyAuthenticator + + +@freeze_time("2023-01-01 00:00:00") +def test_token_rotation(requests_mock): + credentials = { + "client_email": "client_email", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIBVQIBADANBgkqhkiG9w0BAQEFAASCAT8wggE7AgEAAkEA3slcXL+dA36ESmOi\n1xBhZmp5Hn0WkaHDtW4naba3plva0ibloBNWhFhjQOh7Ff01PVjhT4D5jgqXBIgc\nz9Gv3QIDAQABAkEArlhYPoD5SB2/O1PjwHgiMPrL1C9B9S/pr1cH4vPJnpY3VKE3\n5hvdil14YwRrcbmIxMkK2iRLi9lM4mJmdWPy4QIhAPsRFXZSGx0TZsDxD9V0ZJmZ\n0AuDCj/NF1xB5KPLmp7pAiEA4yoFox6w7ql/a1pUVaLt0NJkDfE+22pxYGNQaiXU\nuNUCIQCsFLaIJZiN4jlgbxlyLVeya9lLuqIwvqqPQl6q4ad12QIgS9gG48xmdHig\n8z3IdIMedZ8ZCtKmEun6Cp1+BsK0wDUCIF0nHfSuU+eTQ2qAON2SHIrJf8UeFO7N\nzdTN1IwwQqjI\n-----END PRIVATE KEY-----\n", + "client_id": "client_id" + } + authenticator = GoogleServiceKeyAuthenticator(credentials) + + auth_request = requests_mock.register_uri( + "POST", + authenticator._google_oauth2_token_endpoint, + json={"access_token": "bearer_token", "expires_in": 3600} + ) + + authenticated_request = authenticator(requests.Request()) + assert auth_request.call_count == 1 + assert auth_request.last_request.qs.get("assertion") == ['eyj0exaioijkv1qilcjhbgcioijsuzi1niisimtpzci6imnsawvudf9pzcj9.eyjpc3mioijjbgllbnrfzw1hawwilcjzy29wzsi6imh0dhbzoi8vd3d3lmdvb2dszwfwaxmuy29tl2f1dggvyw5hbhl0awnzlnjlywrvbmx5iiwiyxvkijoiahr0chm6ly9vyxv0adiuz29vz2xlyxbpcy5jb20vdg9rzw4ilcjlehaioje2nzi1mzq4mdasimlhdci6mty3mjuzmtiwmh0.u1gpfmncrtlsy_ujxpc2iazpvdzb6eq4mobq3xez5v6gqtj0xgou__c6neu9d7qvb8h0jkynggsfibkoci_g7a'] + assert auth_request.last_request.qs.get("grant_type") == ["urn:ietf:params:oauth:grant-type:jwt-bearer"] + assert authenticator._token.get("expires_at") == 1672534800 + assert authenticated_request.headers.get("Authorization") == "Bearer bearer_token" From 0e0df29deeddadb517ea30c2e79ec57575cc394b Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Mon, 9 Jan 2023 18:55:42 +0000 Subject: [PATCH 02/14] test_stream_slices added Signed-off-by: Sergey Chvalyuk --- .../unit_tests/test_streams.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py index 3cd7d0881e2b5..57383a9a5f5ec 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py @@ -9,6 +9,7 @@ from unittest.mock import MagicMock import pytest +from freezegun import freeze_time from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream json_credentials = """ @@ -271,3 +272,31 @@ def test_backoff_time(patch_base_class): stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]) expected_backoff_time = None assert stream.backoff_time(response_mock) == expected_backoff_time + + +@freeze_time("2023-01-01 00:00:00") +def test_stream_slices(): + config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1} + stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) + slices = list(stream.stream_slices(sync_mode=None)) + assert slices == [ + {'startDate': '2022-12-29', 'endDate': '2022-12-30'}, + {'startDate': '2022-12-31', 'endDate': '2023-01-01'}, + ] + + config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2} + stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) + slices = list(stream.stream_slices(sync_mode=None)) + assert slices == [ + {'startDate': '2022-12-28', 'endDate': '2022-12-30'}, + {'startDate': '2022-12-31', 'endDate': '2023-01-01'}, + ] + + config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5} + stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) + slices = list(stream.stream_slices(sync_mode=None)) + assert slices == [ + {'startDate': '2022-12-20', 'endDate': '2022-12-25'}, + {'startDate': '2022-12-26', 'endDate': '2022-12-31'}, + {'startDate': '2023-01-01', 'endDate': '2023-01-01'}, + ] From af213135e83ef20e2b1dd631526e2a5d28b602b5 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 06:31:06 +0000 Subject: [PATCH 03/14] stream_slices re-implemented --- .../source_google_analytics_data_api/source.py | 14 +++++--------- .../unit_tests/test_streams.py | 17 ++++++++++------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py index 2983b2dd19bef..bdb157921ec92 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py @@ -264,19 +264,15 @@ def stream_slices( else: start_date = self.config["date_ranges_start_date"] - timedelta: int = self.config["window_in_days"] - while start_date <= today: - end_date: datetime.date = start_date + datetime.timedelta(days=timedelta) - if timedelta > 1 and end_date > today: - end_date: datetime.date = start_date + datetime.timedelta(days=timedelta - (end_date - today).days) - if self._stop_iteration: return - yield {"startDate": utils.date_to_string(start_date), "endDate": utils.date_to_string(end_date)} - - start_date: datetime.date = end_date + datetime.timedelta(days=1) + yield { + "startDate": utils.date_to_string(start_date), + "endDate": utils.date_to_string(min(start_date + datetime.timedelta(days=self.config["window_in_days"] - 1), today)), + } + start_date += datetime.timedelta(days=self.config["window_in_days"]) class GoogleAnalyticsDataApiMetadataStream(GoogleAnalyticsDataApiAbstractStream): diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py index 57383a9a5f5ec..2afa62764e071 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py @@ -280,23 +280,26 @@ def test_stream_slices(): stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) slices = list(stream.stream_slices(sync_mode=None)) assert slices == [ - {'startDate': '2022-12-29', 'endDate': '2022-12-30'}, - {'startDate': '2022-12-31', 'endDate': '2023-01-01'}, + {"startDate": "2022-12-29", "endDate": "2022-12-29"}, + {"startDate": "2022-12-30", "endDate": "2022-12-30"}, + {"startDate": "2022-12-31", "endDate": "2022-12-31"}, + {"startDate": "2023-01-01", "endDate": "2023-01-01"}, ] config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2} stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) slices = list(stream.stream_slices(sync_mode=None)) assert slices == [ - {'startDate': '2022-12-28', 'endDate': '2022-12-30'}, - {'startDate': '2022-12-31', 'endDate': '2023-01-01'}, + {"startDate": "2022-12-28", "endDate": "2022-12-29"}, + {"startDate": "2022-12-30", "endDate": "2022-12-31"}, + {"startDate": "2023-01-01", "endDate": "2023-01-01"}, ] config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5} stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) slices = list(stream.stream_slices(sync_mode=None)) assert slices == [ - {'startDate': '2022-12-20', 'endDate': '2022-12-25'}, - {'startDate': '2022-12-26', 'endDate': '2022-12-31'}, - {'startDate': '2023-01-01', 'endDate': '2023-01-01'}, + {"startDate": "2022-12-20", "endDate": "2022-12-24"}, + {"startDate": "2022-12-25", "endDate": "2022-12-29"}, + {"startDate": "2022-12-30", "endDate": "2023-01-01"}, ] From d57308346b7c6227deff596dffa715f3ee4dd946 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 06:39:49 +0000 Subject: [PATCH 04/14] bump 0.1.1 Signed-off-by: Sergey Chvalyuk --- .../connectors/source-google-analytics-data-api/Dockerfile | 2 +- docs/integrations/sources/google-analytics-v4.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile b/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile index 25ea59c934e45..cd86dac643593 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile @@ -28,5 +28,5 @@ COPY source_google_analytics_data_api ./source_google_analytics_data_api ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-google-analytics-data-api diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 5043ed1a2c2a4..5a6e4abdfddaa 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -70,6 +70,7 @@ added by default to any report. There are 8 default reports. To add more reports | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------| +| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added | | 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT | | 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring | | 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl | From c17df9dc31299e05ab91ba8432f8954a919091b3 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 07:59:34 +0000 Subject: [PATCH 05/14] improve spec.json Signed-off-by: Sergey Chvalyuk --- .../source_google_analytics_data_api/spec.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json index 9ff3ddc21cdba..0fb6011840643 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json @@ -7,12 +7,6 @@ "required": ["property_id", "date_ranges_start_date"], "additionalProperties": true, "properties": { - "property_id": { - "type": "string", - "title": "Property ID", - "description": "A Google Analytics GA4 property identifier whose events are tracked. Specified in the URL path and not the body", - "order": 1 - }, "credentials": { "order": 0, "type": "object", @@ -33,7 +27,6 @@ "title": "Client ID", "type": "string", "description": "The Client ID of your Google Analytics developer application.", - "airbyte_secret": true, "order": 1 }, "client_secret": { @@ -76,12 +69,19 @@ "examples": [ "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }" ], - "airbyte_secret": true + "airbyte_secret": true, + "order": 1 } } } ] }, + "property_id": { + "type": "string", + "title": "Property ID", + "description": "A Google Analytics GA4 property identifier whose events are tracked. Specified in the URL path and not the body", + "order": 1 + }, "date_ranges_start_date": { "type": "string", "title": "Start Date", From aeac6e97d7a9a926c67dbcbb8c83ba154500e6b4 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 08:48:49 +0000 Subject: [PATCH 06/14] setup.py updated Signed-off-by: Sergey Chvalyuk --- .../connectors/source-google-analytics-data-api/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py b/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py index 5e18b8eae8ada..def5d6fb5f346 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1", "google-analytics-data==0.11.2", "PyJWT==2.4.0", "cryptography==37.0.4", "requests==2.28.1"] +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.16", "PyJWT==2.4.0", "cryptography==37.0.4", "requests==2.28.1"] TEST_REQUIREMENTS = [ "freezegun", From 12951b4d872cced9e56dafa9278fdcda960a0f6e Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 11:43:58 +0000 Subject: [PATCH 07/14] first improvement Signed-off-by: Sergey Chvalyuk --- docs/integrations/sources/google-analytics-v4.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 5a6e4abdfddaa..4e596dbda8c72 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -6,10 +6,11 @@ This connector supports GA4 properties through the [Analytics Data API v1](https ## Prerequisites -* JSON credentials for the service account that has access to Google Analytics. For more details check [instructions](https://support.google.com/analytics/answer/1009702#zippy=%2Cin-this-article) +* JSON credentials for the service account that has access to Google Analytics. For more details check [instructions](https://support.google.com/analytics/answer/1009702) * OAuth 2.0 credentials for the service account that has access to Google Analytics * Property ID -* Date Range Start Date +* Start Date +* Custom Reports (Optional) * Data request time increment in days (Optional) ## Custom reports @@ -27,7 +28,7 @@ This connector supports GA4 properties through the [Analytics Data API v1](https First, you need to select existing or create a new project in the Google Developers Console: 1. Sign in to the Google Account you are using for Google Analytics as an admin. -2. Go to the [Service accounts page](https://console.developers.google.com/iam-admin/serviceaccounts). +2. Go to the [Service Accounts](https://console.developers.google.com/iam-admin/serviceaccounts) page. 3. Click `Create service account`. 4. Create a JSON key file for the service user. The contents of this file will be provided as the `credentials_json` in the UI when authorizing GA after you grant permissions \(see below\). @@ -42,7 +43,7 @@ Use the service account email address to [add a user](https://support.google.com ### Property ID -Specify the Property ID as set [here](https://analytics.google.com/analytics/web/a54907729p153687530/admin/property/settings) +To determine a Google Analytics 4 [Property ID](https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id) ## Step 2: Set up the source connector in Airbyte @@ -63,8 +64,8 @@ The Google Analytics source connector supports the following [sync modes](https: # Reports -The reports are custom by setting the dimensions and metrics required. To support Incremental sync, the `uuid` field is -added by default to any report. There are 8 default reports. To add more reports, you need to specify the `custom reports` field. +The reports are custom by setting the dimensions and metrics required. To support Incremental sync, the `date` dimension is +added by default to all reports. There are 8 default reports. To add more reports, you need to specify the `custom reports` field. ## Changelog From 9f59b8096e118468326882416244d6c7d6695760 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 11:47:15 +0000 Subject: [PATCH 08/14] change "Supported sync modes" Signed-off-by: Sergey Chvalyuk --- docs/integrations/sources/google-analytics-v4.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 4e596dbda8c72..5b561844e804c 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -53,8 +53,11 @@ custom reports, date ranges start date, data request time increment in days. ## Supported sync modes The Google Analytics source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): - - Full Refresh - - Incremental + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/glossary#full-refresh-sync) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) ## Rate Limits & Performance Considerations \(Airbyte Open-Source\) From bfbc6e2aff373271d59531cdd2dbd5ae3afd7353 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 12:07:50 +0000 Subject: [PATCH 09/14] continue to update Signed-off-by: Sergey Chvalyuk --- .../sources/google-analytics-v4.md | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 5b561844e804c..1904a5bb69369 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -13,14 +13,6 @@ This connector supports GA4 properties through the [Analytics Data API v1](https * Custom Reports (Optional) * Data request time increment in days (Optional) -## Custom reports - -* Support for multiple custom reports -* Custom reports in format `[{"name": "", "dimensions": ["", ...], "metrics": ["", ...]}]` -* Custom report format when using segments and / or filters `[{"name": "", "dimensions": ["", ...], "metrics": ["", ...], "segments": [""}]` -* When using segments, make sure you add the `ga:segment` dimension. -* Custom reports: [Dimensions and metrics explorer](https://ga-dev-tools.web.app/dimensions-metrics-explorer/) - ## Step 1: Set up Source ### Create a Service Account @@ -45,10 +37,19 @@ Use the service account email address to [add a user](https://support.google.com To determine a Google Analytics 4 [Property ID](https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id) -## Step 2: Set up the source connector in Airbyte +### Step 2: Set up the Google Analytics connector in Airbyte + +**For Airbyte Cloud:** -Set the required fields in the Google Analytics Data API connector page such as the JSON credentials, property ID, -custom reports, date ranges start date, data request time increment in days. +1. [Login to your Airbyte Cloud](https://cloud.airbyte.io/workspaces) account. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. +3. On the source setup page, select **Google Analytics 4 (GA4)** from the Source type dropdown and enter a name for this connector. +4. Click `Authenticate your account` by selecting Oauth or Service Account for Authentication. +5. Log in and Authorize the Google Analytics account. +6. Enter the **Property ID** whose events are tracked. +7. Enter the **Start Date** from which to replicate report data in the format YYYY-MM-DD. +8. Enter the **Custom Reports (Optional)** a JSON array describing the custom reports you want to sync from Google Analytics. +9. Enter the **Data request time increment in days (Optional)**. The bigger this value is, the faster the sync will be, but the more likely that sampling will be applied to your data, potentially causing inaccuracies in the returned results. We recommend setting this to 1 unless you have a hard requirement to make the sync faster at the expense of accuracy. The minimum allowed value for this field is 1, and the maximum is 364. ## Supported sync modes @@ -59,6 +60,14 @@ The Google Analytics source connector supports the following [sync modes](https: - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) - [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +## Custom reports + +* Support for multiple custom reports +* Custom reports in format `[{"name": "", "dimensions": ["", ...], "metrics": ["", ...]}]` +* Custom report format when using segments and / or filters `[{"name": "", "dimensions": ["", ...], "metrics": ["", ...], "segments": [""}]` +* When using segments, make sure you add the `ga:segment` dimension. +* Custom reports: [Dimensions and metrics explorer](https://ga-dev-tools.web.app/dimensions-metrics-explorer/) + ## Rate Limits & Performance Considerations \(Airbyte Open-Source\) [Google Analytics Data API](https://developers.google.com/analytics/devguides/reporting/data/v1/quotas) From d37d8b6912f3d300aa9c3baec33553ab2b054922 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 12:26:20 +0000 Subject: [PATCH 10/14] google-analytics-v4.md updated Signed-off-by: Sergey Chvalyuk --- docs/integrations/sources/google-analytics-v4.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 1904a5bb69369..9ef106cbfed93 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -60,6 +60,19 @@ The Google Analytics source connector supports the following [sync modes](https: - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) - [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +## Supported Streams + +This connector outputs the following incremental streams: + +* [daily_active_users](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [devices](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [four_weekly_active_users](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [locations](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [pages](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [traffic_sources](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [website_overview](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) +* [weekly_active_users](https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runReport) + ## Custom reports * Support for multiple custom reports From a7b8fa6fa13ee478176ae20a963fd201c1804533 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 12:31:52 +0000 Subject: [PATCH 11/14] alpha -> beta Signed-off-by: Sergey Chvalyuk --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index aa9ba34dddbec..6bc094fa33be1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -626,7 +626,7 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/google-analytics-v4 icon: google-analytics.svg sourceType: api - releaseStage: alpha + releaseStage: beta - name: Google Directory sourceDefinitionId: d19ae824-e289-4b14-995a-0632eb46d246 dockerRepository: airbyte/source-google-directory From b34b44e5c30503baf0fb2b9b0beecc2cf9ab6cfc Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Tue, 10 Jan 2023 12:59:57 +0000 Subject: [PATCH 12/14] fix custom-reports url Signed-off-by: Sergey Chvalyuk --- .../source_google_analytics_data_api/spec.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json index 0fb6011840643..ca7c39512e1a2 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/spec.json @@ -93,7 +93,7 @@ "order": 3, "type": "string", "title": "Custom Reports", - "description": "A JSON array describing the custom reports you want to sync from Google Analytics. See the docs for more information about the exact format you can use to fill out this field." + "description": "A JSON array describing the custom reports you want to sync from Google Analytics. See the docs for more information about the exact format you can use to fill out this field." }, "window_in_days": { "type": "integer", From cba0f5c3be32191210b07834057a479b73a4205a Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Wed, 11 Jan 2023 07:51:22 +0000 Subject: [PATCH 13/14] test_read_incremental added Signed-off-by: Sergey Chvalyuk --- .../unit_tests/test_streams.py | 86 +++++++++++++++++++ .../unit_tests/utils.py | 17 ++++ 2 files changed, 103 insertions(+) create mode 100644 airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/utils.py diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py index 2afa62764e071..4102575dbb3bc 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py @@ -12,6 +12,8 @@ from freezegun import freeze_time from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream +from .utils import read_incremental + json_credentials = """ { "type": "service_account", @@ -303,3 +305,87 @@ def test_stream_slices(): {"startDate": "2022-12-25", "endDate": "2022-12-29"}, {"startDate": "2022-12-30", "endDate": "2023-01-01"}, ] + + +def test_read_incremental(requests_mock): + config = { + "property_id": 123, + "date_ranges_start_date": datetime.date(2022, 12, 29), + "window_in_days": 1, + "dimensions": ["date"], + "metrics": ["totalUsers"], + } + + stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config) + stream_state = {} + + responses = [ + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20221229"}], "metricValues": [{"value": "100"}]}], + "rowCount": 1 + }, + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20221230"}], "metricValues": [{"value": "110"}]}], + "rowCount": 1 + }, + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20221231"}], "metricValues": [{"value": "120"}]}], + "rowCount": 1 + }, + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "130"}]}], + "rowCount": 1 + }, + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "140"}]}], + "rowCount": 1 + }, + { + "dimensionHeaders": [{"name": "date"}], + "metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}], + "rows": [{"dimensionValues": [{"value": "20230102"}], "metricValues": [{"value": "150"}]}], + "rowCount": 1 + } + ] + + requests_mock.register_uri( + "POST", + "https://analyticsdata.googleapis.com/v1beta/properties/123:runReport", + json=lambda request, context: responses.pop(0), + ) + + with freeze_time("2023-01-01 12:00:00"): + records = list(read_incremental(stream, stream_state)) + + for record in records: + del record["uuid"] + + assert records == [ + {"date": "20221229", "totalUsers": 100, "property_id": 123}, + {"date": "20221230", "totalUsers": 110, "property_id": 123}, + {"date": "20221231", "totalUsers": 120, "property_id": 123}, + {"date": "20230101", "totalUsers": 130, "property_id": 123}, + ] + + assert stream_state == {"date": "20230101"} + + with freeze_time("2023-01-02 12:00:00"): + records = list(read_incremental(stream, stream_state)) + + for record in records: + del record["uuid"] + + assert records == [ + {"date": "20230101", "totalUsers": 140, "property_id": 123}, + {"date": "20230102", "totalUsers": 150, "property_id": 123}, + ] diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/utils.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/utils.py new file mode 100644 index 0000000000000..0c3705ef0e93f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/utils.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from typing import Any, MutableMapping + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams import Stream + + +def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]): + slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state) + for _slice in slices: + records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=_slice, stream_state=stream_state) + for record in records: + stream_state = stream_instance.get_updated_state(stream_state, record) + yield record From 706be68496e37ff7bf5221bd5ec1b3126602d551 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Wed, 11 Jan 2023 14:57:53 +0000 Subject: [PATCH 14/14] auto-bump connector version --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1eb82c17e7bd9..255c6a68f94c5 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -651,7 +651,7 @@ - name: Google Analytics 4 (GA4) sourceDefinitionId: 3cc2eafd-84aa-4dca-93af-322d9dfeec1a dockerRepository: airbyte/source-google-analytics-data-api - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.com/integrations/sources/google-analytics-v4 icon: google-analytics.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6528f9b8c1705..8c4a5435eb9bf 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5383,7 +5383,7 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" -- dockerImage: "airbyte/source-google-analytics-data-api:0.1.0" +- dockerImage: "airbyte/source-google-analytics-data-api:0.1.1" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/google-analytics-v4" connectionSpecification: @@ -5395,12 +5395,6 @@ - "date_ranges_start_date" additionalProperties: true properties: - property_id: - type: "string" - title: "Property ID" - description: "A Google Analytics GA4 property identifier whose events are\ - \ tracked. Specified in the URL path and not the body" - order: 1 credentials: order: 0 type: "object" @@ -5422,7 +5416,6 @@ title: "Client ID" type: "string" description: "The Client ID of your Google Analytics developer application." - airbyte_secret: true order: 1 client_secret: title: "Client Secret" @@ -5460,6 +5453,13 @@ - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" airbyte_secret: true + order: 1 + property_id: + type: "string" + title: "Property ID" + description: "A Google Analytics GA4 property identifier whose events are\ + \ tracked. Specified in the URL path and not the body" + order: 1 date_ranges_start_date: type: "string" title: "Start Date" @@ -5473,7 +5473,7 @@ type: "string" title: "Custom Reports" description: "A JSON array describing the custom reports you want to sync\ - \ from Google Analytics. See the docs for more information about the exact format you can use\ \ to fill out this field." window_in_days: