Skip to content

Commit

Permalink
Source Google Analytics (GA4): Fix Pagination (#26126)
Browse files Browse the repository at this point in the history
* Fix paggination and add offset and limit to acceptable parameters in request body

* Change next_page_token and add tests

* Update dockerImageTag

* Update PR version

* Remove minimum, maximum, pattern fields

* Remove pattern limit and offset from test_source.py

* Remove offset and limit string type

* Remove offset and limit string type

* Increase limit number to 100000 and remove limit and offset from parameters

* Change return type value of next_page_token from int to dict

* Change return type value of next_page_token from int to dict

* Change page_size to offset and add constant PAGE_SIZE equals 100000

* Add comment to PAGE_SIZE constant and add constant to unit tests

* Remove offset and limit from PivotReport

* Import PAGE_SIZE in unit_tests from source.py

---------

Co-authored-by: Marcos Marx <marcosmarxm@users.noreply.github.com>
Co-authored-by: sh4sh <6833405+sh4sh@users.noreply.github.com>
  • Loading branch information
3 people committed May 31, 2023
1 parent c10bd5e commit 746ccb4
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ COPY source_google_analytics_data_api ./source_google_analytics_data_api
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.2
LABEL io.airbyte.version=0.2.3
LABEL io.airbyte.name=airbyte/source-google-analytics-data-api
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 3cc2eafd-84aa-4dca-93af-322d9dfeec1a
dockerImageTag: 0.2.2
dockerImageTag: 0.2.3
dockerRepository: airbyte/source-google-analytics-data-api
githubIssueLabel: source-google-analytics-data-api
icon: google-analytics.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
# the initial values should be saved once and tracked for each stream, inclusivelly.
GoogleAnalyticsQuotaHandler: GoogleAnalyticsApiQuota = GoogleAnalyticsApiQuota()

# set page_size to 100000 due to determination of maximum limit value in official documentation
# https://developers.google.com/analytics/devguides/reporting/data/v1/basics#pagination
PAGE_SIZE = 100000


class ConfigurationError(Exception):
pass
Expand Down Expand Up @@ -90,6 +94,7 @@ class GoogleAnalyticsDataApiBaseStream(GoogleAnalyticsDataApiAbstractStream):

_record_date_format = "%Y%m%d"
primary_key = "uuid"
offset = 0

metadata = MetadataDescriptor()

Expand Down Expand Up @@ -154,13 +159,19 @@ def get_json_schema(self) -> Mapping[str, Any]:
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
r = response.json()

if all(key in r for key in ["limit", "offset", "rowCount"]):
limit, offset, total_rows = r["limit"], r["offset"], r["rowCount"]
if "rowCount" in r:
total_rows = r["rowCount"]

if self.offset == 0:
self.offset = PAGE_SIZE
else:
self.offset += PAGE_SIZE

if total_rows <= offset:
return None
if total_rows <= self.offset:
self.offset = 0
return

return {"limit": limit, "offset": offset + limit}
return {"offset": self.offset}

def path(
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
Expand Down Expand Up @@ -201,12 +212,17 @@ def request_body_json(
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Optional[Mapping]:

payload = {
"metrics": [{"name": m} for m in self.config["metrics"]],
"dimensions": [{"name": d} for d in self.config["dimensions"]],
"dateRanges": [stream_slice],
"returnPropertyQuota": True,
"offset": str(0),
"limit": str(PAGE_SIZE)
}
if next_page_token and next_page_token.get("offset") is not None:
payload.update({"offset": str(next_page_token["offset"])})
return payload

def stream_slices(
Expand Down Expand Up @@ -243,6 +259,11 @@ def request_body_json(
next_page_token: Mapping[str, Any] = None,
) -> Optional[Mapping]:
payload = super().request_body_json(stream_state, stream_slice, next_page_token)

# remove offset and limit fields according to their absence in
# https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/properties/runPivotReport
payload.pop("offset", None)
payload.pop("limit", None)
payload["pivots"] = self.config["pivots"]
return payload

Expand Down Expand Up @@ -398,7 +419,11 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
def instantiate_report_class(report: dict, config: Mapping[str, Any]) -> GoogleAnalyticsDataApiBaseStream:
cohort_spec = report.get("cohortSpec")
pivots = report.get("pivots")
stream_config = {"metrics": report["metrics"], "dimensions": report["dimensions"], **config}
stream_config = {
"metrics": report["metrics"],
"dimensions": report["dimensions"],
**config
}
report_class_tuple = (GoogleAnalyticsDataApiBaseStream,)
if pivots:
stream_config["pivots"] = pivots
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@

import pytest
from freezegun import freeze_time
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream, PAGE_SIZE

from .utils import read_incremental


json_credentials = """
{
"type": "service_account",
Expand Down Expand Up @@ -88,6 +89,8 @@ def test_request_body_json(patch_base_class):
],
"dateRanges": [request_body_params["stream_slice"]],
"returnPropertyQuota": True,
"offset": str(0),
"limit": str(PAGE_SIZE)
}

request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(**request_body_params)
Expand All @@ -98,21 +101,15 @@ def test_next_page_token_equal_chunk(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"limit": 100000, "offset": 0, "rowCount": 200000},
{"limit": 100000, "offset": 100000, "rowCount": 200000},
{"limit": 100000, "offset": 200000, "rowCount": 200000},
{"rowCount": 300000},
{"rowCount": 300000},
{"rowCount": 300000},
]
inputs = {"response": response}

expected_tokens = [
{
"limit": 100000,
"offset": 100000,
},
{
"limit": 100000,
"offset": 200000,
},
{"offset": 100000},
{"offset": 200000},
None,
]

Expand All @@ -124,26 +121,19 @@ def test_next_page_token(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"limit": 100000, "offset": 0, "rowCount": 250000},
{"limit": 100000, "offset": 100000, "rowCount": 250000},
{"limit": 100000, "offset": 200000, "rowCount": 250000},
{"limit": 100000, "offset": 300000, "rowCount": 250000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
]
inputs = {"response": response}

expected_tokens = [
{
"limit": 100000,
"offset": 100000,
},
{
"limit": 100000,
"offset": 200000,
},
{
"limit": 100000,
"offset": 300000,
},
{"offset": 100000},
{"offset": 200000},
{"offset": 300000},
{"offset": 400000},
None,
]

Expand Down
25 changes: 13 additions & 12 deletions docs/integrations/sources/google-analytics-data-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,16 @@ This connector outputs the following incremental streams:

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------|
| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately |
| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error |
| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports |
| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports |
| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` |
| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added |
| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT |
| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring |
| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl |
| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------|
| 0.2.3 | 2023-05-16 | [26126](https://github.com/airbytehq/airbyte/pull/26126) | Fix pagination |
| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately |
| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error |
| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports |
| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports |
| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` |
| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added |
| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT |
| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring |
| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl |
| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source |

0 comments on commit 746ccb4

Please sign in to comment.