Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source Google Analytics Data API: ability to add multiple property ids #30152

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ COPY source_google_analytics_data_api ./source_google_analytics_data_api
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=1.2.0
LABEL io.airbyte.version=1.3.0
LABEL io.airbyte.name=airbyte/source-google-analytics-data-api
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ acceptance_tests:
tests:
- spec_path: "source_google_analytics_data_api/spec.json"
backward_compatibility_tests_config:
disable_for_version: 0.2.1
disable_for_version: 1.2.0
connection:
tests:
- config_path: "secrets/config.json"
Expand All @@ -17,6 +17,8 @@ acceptance_tests:
discovery:
tests:
- config_path: "secrets/config.json"
backward_compatibility_tests_config:
disable_for_version: 1.2.0
basic_read:
tests:
- config_path: "secrets/config.json"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"property_id": "1",
"property_id": ["1"],
"json_credentials": "wrong",
"report_name": "crash_report",
"dimensions": "date, operatingSystem, streamId",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

from airbyte_cdk.entrypoint import launch
from source_google_analytics_data_api import SourceGoogleAnalyticsDataApi
from source_google_analytics_data_api.config_migrations import MigrateCustomReports

if __name__ == "__main__":
source = SourceGoogleAnalyticsDataApi()
MigrateCustomReports.migrate(sys.argv[1:], source)
launch(source, sys.argv[1:])
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 3cc2eafd-84aa-4dca-93af-322d9dfeec1a
dockerImageTag: 1.2.0
dockerImageTag: 1.3.0
dockerRepository: airbyte/source-google-analytics-data-api
githubIssueLabel: source-google-analytics-data-api
icon: google-analytics.svg
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import logging
from typing import Any, List, Mapping

from airbyte_cdk.config_observation import create_connector_config_control_message
from airbyte_cdk.entrypoint import AirbyteEntrypoint
from airbyte_cdk.sources import Source
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository

logger = logging.getLogger("airbyte_logger")


class MigrateCustomReports:
"""
This class stands for migrating the config at runtime,
while providing the backward compatibility when falling back to the previous source version.

Specifically, starting from `1.3.0`, the `property_id` property should be like :
> List(["<property_id 1>", "<property_id 2>", ..., "<property_id n>"])
instead of, in `1.2.0`:
> JSON STR: "<property_id>"
"""

message_repository: MessageRepository = InMemoryMessageRepository()
migrate_from_key: str = "property_id"
migrate_to_key: str = "property_ids"

@classmethod
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
"""
This method determines whether config require migration.
Returns:
> True, if the transformation is neccessary
> False, otherwise.
"""
if cls.migrate_from_key in config:
return True
return False

@classmethod
def transform_to_array(cls, config: Mapping[str, Any], source: Source = None) -> Mapping[str, Any]:
# assign old values to new property that will be used within the new version
config[cls.migrate_to_key] = config[cls.migrate_to_key] if cls.migrate_to_key in config else []
data = config.pop(cls.migrate_from_key)
if data not in config[cls.migrate_to_key]:
config[cls.migrate_to_key].append(data)
return config

@classmethod
def modify_and_save(cls, config_path: str, source: Source, config: Mapping[str, Any]) -> Mapping[str, Any]:
# modify the config
migrated_config = cls.transform_to_array(config, source)
# save the config
source.write_config(migrated_config, config_path)
# return modified config
return migrated_config

@classmethod
def emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None:
# add the Airbyte Control Message to message repo
cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
# emit the Airbyte Control Message from message queue to stdout
for message in cls.message_repository._message_queue:
print(message.json(exclude_unset=True))

@classmethod
def migrate(cls, args: List[str], source: Source) -> None:
"""
This method checks the input args, should the config be migrated,
transform if neccessary and emit the CONTROL message.
"""
# get config path
config_path = AirbyteEntrypoint(source).extract_config(args)
# proceed only if `--config` arg is provided
if config_path:
# read the existing config
config = source.read_config(config_path)
# migration check
if cls.should_migrate(config):
cls.emit_control_message(
cls.modify_and_save(config_path, source, config),
)
Original file line number Diff line number Diff line change
Expand Up @@ -413,67 +413,79 @@ def get_authenticator(self, config: Mapping[str, Any]):
return authenticator_class(**get_credentials(credentials))

def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
reports = json.loads(pkgutil.get_data("source_google_analytics_data_api", "defaults/default_reports.json"))
try:
config = self._validate_and_transform(config, report_names={r["name"] for r in reports})
except ConfigurationError as e:
return False, str(e)
config["authenticator"] = self.get_authenticator(config)
for property_id in config["property_ids"]:
reports = json.loads(pkgutil.get_data("source_google_analytics_data_api", "defaults/default_reports.json"))
try:
config = self._validate_and_transform(config, report_names={r["name"] for r in reports})
except ConfigurationError as e:
return False, str(e)
config["authenticator"] = self.get_authenticator(config)

metadata = None
try:
# explicitly setting small page size for the check operation not to cause OOM issues
stream = GoogleAnalyticsDataApiMetadataStream(config=config, authenticator=config["authenticator"])
metadata = next(stream.read_records(sync_mode=SyncMode.full_refresh), None)
except HTTPError as e:
error_list = [HTTPStatus.BAD_REQUEST, HTTPStatus.FORBIDDEN]
if e.response.status_code in error_list:
internal_message = f"Incorrect Property ID: {config['property_id']}"
property_id_docs_url = (
"https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id"
)
message = f"Access was denied to the property ID entered. Check your access to the Property ID or use Google Analytics {property_id_docs_url} to find your Property ID."

wrong_property_id_error = AirbyteTracedException(
message=message, internal_message=internal_message, failure_type=FailureType.config_error
)
raise wrong_property_id_error

if not metadata:
return False, "failed to get metadata, over quota, try later"

dimensions = {d["apiName"] for d in metadata["dimensions"]}
metrics = {d["apiName"] for d in metadata["metrics"]}

for report in config["custom_reports"]:
invalid_dimensions = set(report["dimensions"]) - dimensions
if invalid_dimensions:
invalid_dimensions = ", ".join(invalid_dimensions)
return False, WRONG_DIMENSIONS.format(fields=invalid_dimensions, report_name=report["name"])
invalid_metrics = set(report["metrics"]) - metrics
if invalid_metrics:
invalid_metrics = ", ".join(invalid_metrics)
return False, WRONG_METRICS.format(fields=invalid_metrics, report_name=report["name"])
report_stream = self.instantiate_report_class(report, config, page_size=100)
# check if custom_report dimensions + metrics can be combined and report generated
stream_slice = next(report_stream.stream_slices(sync_mode=SyncMode.full_refresh))
next(report_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice), None)
return True, None
_config = config.copy()
_config["property_id"] = property_id

metadata = None
try:
# explicitly setting small page size for the check operation not to cause OOM issues
stream = GoogleAnalyticsDataApiMetadataStream(config=_config, authenticator=_config["authenticator"])
metadata = next(stream.read_records(sync_mode=SyncMode.full_refresh), None)
except HTTPError as e:
error_list = [HTTPStatus.BAD_REQUEST, HTTPStatus.FORBIDDEN]
if e.response.status_code in error_list:
internal_message = f"Incorrect Property ID: {property_id}"
property_id_docs_url = (
"https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id"
)
message = f"Access was denied to the property ID entered. Check your access to the Property ID or use Google Analytics {property_id_docs_url} to find your Property ID."

wrong_property_id_error = AirbyteTracedException(
message=message, internal_message=internal_message, failure_type=FailureType.config_error
)
raise wrong_property_id_error

if not metadata:
return False, "Failed to get metadata, over quota, try later"

dimensions = {d["apiName"] for d in metadata["dimensions"]}
metrics = {d["apiName"] for d in metadata["metrics"]}

for report in _config["custom_reports"]:
# Check if custom report dimensions supported. Compare them with dimensions provided by GA API
invalid_dimensions = set(report["dimensions"]) - dimensions
if invalid_dimensions:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add spaces between condition blocks and add comments for them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

invalid_dimensions = ", ".join(invalid_dimensions)
return False, WRONG_DIMENSIONS.format(fields=invalid_dimensions, report_name=report["name"])

# Check if custom report metrics supported. Compare them with metrics provided by GA API
invalid_metrics = set(report["metrics"]) - metrics
if invalid_metrics:
invalid_metrics = ", ".join(invalid_metrics)
return False, WRONG_METRICS.format(fields=invalid_metrics, report_name=report["name"])

report_stream = self.instantiate_report_class(report, _config, page_size=100)
# check if custom_report dimensions + metrics can be combined and report generated
stream_slice = next(report_stream.stream_slices(sync_mode=SyncMode.full_refresh))
next(report_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice), None)

return True, None

def streams(self, config: Mapping[str, Any]) -> List[Stream]:
reports = json.loads(pkgutil.get_data("source_google_analytics_data_api", "defaults/default_reports.json"))
config = self._validate_and_transform(config, report_names={r["name"] for r in reports})
config["authenticator"] = self.get_authenticator(config)
return [self.instantiate_report_class(report, config) for report in reports + config["custom_reports"]]
return [stream for report in reports + config["custom_reports"] for stream in self.instantiate_report_streams(report, config)]

@staticmethod
def instantiate_report_class(report: dict, config: Mapping[str, Any], **extra_kwargs) -> GoogleAnalyticsDataApiBaseStream:
def instantiate_report_streams(self, report: dict, config: Mapping[str, Any], **extra_kwargs) -> GoogleAnalyticsDataApiBaseStream:
for property_id in config["property_ids"]:
yield self.instantiate_report_class(report=report, config={**config, "property_id": property_id})

def instantiate_report_class(self, report: dict, config: Mapping[str, Any], **extra_kwargs) -> GoogleAnalyticsDataApiBaseStream:
cohort_spec = report.get("cohortSpec")
pivots = report.get("pivots")
stream_config = {
**config,
"metrics": report["metrics"],
"dimensions": report["dimensions"],
**config,
}
report_class_tuple = (GoogleAnalyticsDataApiBaseStream,)
if pivots:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"$schema": "https://json-schema.org/draft-07/schema#",
"title": "Google Analytics (Data API) Spec",
"type": "object",
"required": ["property_id", "date_ranges_start_date"],
"required": ["property_ids", "date_ranges_start_date"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you test those changes manually for backward compatibility?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did, but it seems I missed something. Now migration is prepared and everithing is tested

"additionalProperties": true,
"properties": {
"credentials": {
Expand Down Expand Up @@ -76,14 +76,16 @@
}
]
},
"property_id": {
"type": "string",
"title": "Property ID",
"property_ids": {
"title": "A list of your Property IDs",
"description": "The Property ID is a unique number assigned to each property in Google Analytics, found in your GA4 property URL. This ID allows the connector to track the specific events associated with your property. Refer to the <a href='https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id'>Google Analytics documentation</a> to locate your property ID.",
"pattern": "^[0-9]*$",
"pattern_descriptor": "123...",
"examples": ["1738294", "5729978930"],
"order": 1
"order": 1,
"type": "array",
"items": {
"type": "string",
"pattern": "^[0-9]*$"
},
"examples": [["1738294", "5729978930"]]
},
"date_ranges_start_date": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from unittest.mock import patch

from airbyte_cdk.entrypoint import AirbyteEntrypoint
from source_google_analytics_data_api import SourceGoogleAnalyticsDataApi
from source_google_analytics_data_api.config_migrations import MigrateCustomReports


@patch.object(SourceGoogleAnalyticsDataApi, "read_config")
@patch.object(SourceGoogleAnalyticsDataApi, "write_config")
@patch.object(AirbyteEntrypoint, "extract_config")
def test_migration(ab_entrypoint_extract_config_mock, source_write_config_mock, source_read_config_mock):
source = SourceGoogleAnalyticsDataApi()

source_read_config_mock.return_value = {
"credentials": {
"auth_type": "Service",
"credentials_json": "<credentials string ...>"
},
"custom_reports": "<custom reports out of current test>",
"date_ranges_start_date": "2023-09-01",
"window_in_days": 30,
"property_id": "111111111"
}
ab_entrypoint_extract_config_mock.return_value = "/path/to/config.json"

def check_migrated_value(new_config, path):
assert path == "/path/to/config.json"
assert "property_id" not in new_config
assert "property_ids" in new_config
assert "111111111" in new_config["property_ids"]
assert len(new_config["property_ids"]) == 1

source_write_config_mock.side_effect = check_migrated_value

MigrateCustomReports.migrate(["--config", "/path/to/config.json"], source)
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
def patch_base_class():
return {
"config": {
"property_id": "108176369",
"property_ids": ["108176369"],
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"date_ranges_start_date": datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(days=1)), "%Y-%m-%d"),
}
Expand All @@ -43,7 +43,7 @@ def patch_base_class():
@pytest.fixture
def config():
return {
"property_id": "108176369",
"property_ids": ["108176369"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add unit test with old config and try test backward compatibility in that way

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in scope of migration

"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"date_ranges_start_date": datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(days=1)), "%Y-%m-%d"),
"custom_reports": json.dumps([{
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_check(requests_mock, config_gen, config_values, is_successful, message)
assert source.check(logger, config_gen(**config_values)) == AirbyteConnectionStatus(status=is_successful, message=message)
if not is_successful:
with pytest.raises(AirbyteTracedException) as e:
source.check(logger, config_gen(property_id="UA-11111111"))
source.check(logger, config_gen(property_ids=["UA-11111111"]))
assert e.value.failure_type == FailureType.config_error


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def patch_base_class(mocker):

return {
"config": {
"property_ids": ["496180525"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if an old config has not property_ids

what happens next?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Figured out in scope of migration

"property_id": "496180525",
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"dimensions": ["date", "deviceCategory", "operatingSystem", "browser"],
Expand Down Expand Up @@ -307,6 +308,7 @@ def test_stream_slices():

def test_read_incremental(requests_mock):
config = {
"property_ids": [123],
"property_id": 123,
"date_ranges_start_date": datetime.date(2022, 12, 29),
"window_in_days": 1,
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/google-analytics-data-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ The Google Analytics connector is subject to Google Analytics Data API quotas. P

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------|
| 1.3.0 | 2023-09-13 | [30152](https://github.com/airbytehq/airbyte/pull/30152) | Ability to add multiple property ids |
| 1.2.0 | 2023-09-11 | [30290](https://github.com/airbytehq/airbyte/pull/30290) | Add new preconfigured reports |
| 1.1.3 | 2023-08-04 | [29103](https://github.com/airbytehq/airbyte/pull/29103) | Update input field descriptions |
| 1.1.2 | 2023-07-03 | [27909](https://github.com/airbytehq/airbyte/pull/27909) | Limit the page size of custom report streams |
Expand Down
Loading