Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source Mailchimp: Handle empty fields in Reports stream #32543

Merged
merged 4 commits into from Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002
dockerImageTag: 0.8.2
dockerImageTag: 0.8.3
dockerRepository: airbyte/source-mailchimp
documentationUrl: https://docs.airbyte.com/integrations/sources/mailchimp
githubIssueLabel: source-mailchimp
Expand Down
Expand Up @@ -138,7 +138,7 @@
"description": "The number of unique opens divided by the total number of successful deliveries."
},
"last_open": {
"type": "string",
"type": ["null", "string"],
"format": "date-time",
"title": "Last Open",
"description": "The date and time of the last recorded open in ISO 8601 format."
Expand Down
Expand Up @@ -274,21 +274,28 @@ class Reports(IncrementalMailChimpStream):
cursor_field = "send_time"
data_field = "reports"

@staticmethod
def remove_empty_datetime_fields(record: Mapping[str, Any]) -> Mapping[str, Any]:
"""
In some cases, the 'clicks.last_click' and 'opens.last_open' fields are returned as an empty string,
which causes validation errors on the `date-time` format.
To avoid this, we remove the fields if they are empty.
"""
clicks = record.get("clicks", {})
opens = record.get("opens", {})
if not clicks.get("last_click"):
clicks.pop("last_click", None)
if not opens.get("last_open"):
opens.pop("last_open", None)
return record

def path(self, **kwargs) -> str:
return "reports"

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:

response = super().parse_response(response, **kwargs)

# In some cases, the 'last_click' field is returned as an empty string,
# which causes validation errors on the `date-time` format.
# To avoid this, we remove the field if it is empty.
for record in response:
clicks = record.get("clicks", {})
if not clicks.get("last_click"):
clicks.pop("last_click", None)
yield record
yield self.remove_empty_datetime_fields(record)


class Segments(MailChimpListSubStream):
Expand Down
Expand Up @@ -10,7 +10,7 @@
import responses
from airbyte_cdk.models import SyncMode
from requests.exceptions import HTTPError
from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Segments
from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Reports, Segments
from utils import read_full_refresh, read_incremental


Expand Down Expand Up @@ -413,3 +413,39 @@ def test_403_error_handling(
# Handle non-403 error
except HTTPError as e:
assert e.response.status_code == status_code

@pytest.mark.parametrize(
"record, expected_return",
[
(
{"clicks": {"last_click": ""}, "opens": {"last_open": ""}},
{"clicks": {}, "opens": {}},
),
(
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": ""}},
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {}},
),
(
{"clicks": {"last_click": ""}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
{"clicks": {}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},

),
(
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
),
],
ids=[
"last_click and last_open empty",
"last_click empty",
"last_open empty",
"last_click and last_open not empty"
]
)
def test_reports_remove_empty_datetime_fields(auth, record, expected_return):
"""
Tests that the Reports stream removes the 'clicks' and 'opens' fields from the response
when they are empty strings
"""
stream = Reports(authenticator=auth)
assert stream.remove_empty_datetime_fields(record) == expected_return, f"Expected: {expected_return}, Actual: {stream.remove_empty_datetime_fields(record)}"
1 change: 1 addition & 0 deletions docs/integrations/sources/mailchimp.md
Expand Up @@ -76,6 +76,7 @@ Now that you have set up the Mailchimp source connector, check out the following

| Version | Date | Pull Request | Subject |
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------|
| 0.8.3 | 2023-11-15 | [32543](https://github.com/airbytehq/airbyte/pull/32543) | Handle empty datetime fields in Reports stream |
| 0.8.2 | 2023-11-13 | [32466](https://github.com/airbytehq/airbyte/pull/32466) | Improve error handling during connection check |
| 0.8.1 | 2023-11-06 | [32226](https://github.com/airbytehq/airbyte/pull/32226) | Unmute expected records test after data anonymisation |
| 0.8.0 | 2023-11-01 | [32032](https://github.com/airbytehq/airbyte/pull/32032) | Add ListMembers stream |
Expand Down