Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚨 🚨 Source Mixpanel: fix typing #30025

Merged
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mixpanel/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]


LABEL io.airbyte.version=0.1.41
LABEL io.airbyte.version=1.0.0
LABEL io.airbyte.name=airbyte/source-mixpanel
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-mixpanel:dev
# custom configuration is used for tests to speed up testing and avoid hitting rate limits
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 thanks for adding the comment here.

custom_environment_variables:
REQS_PER_HOUR_LIMIT: 0
AVAILABLE_TESTING_RANGE_DAYS: 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_cursor": true,
"default_cursor_field": ["time"],
"source_defined_primary_key": [["distinct_id"]]
"source_defined_primary_key": [["distinct_id"], ["event"], ["time"]]
},
"sync_mode": "incremental",
"destination_sync_mode": "append",
"cursor_field": ["time"],
"primary_key": [["distinct_id"]]
"primary_key": [["distinct_id"], ["event"], ["time"]]
},
{
"stream": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a
dockerImageTag: 0.1.41
dockerImageTag: 1.0.0
dockerRepository: airbyte/source-mixpanel
githubIssueLabel: source-mixpanel
icon: mixpanel.svg
Expand All @@ -18,6 +18,11 @@ data:
oss:
enabled: true
releaseStage: generally_available
releases:
breakingChanges:
1.0.0:
message: In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading.
upgradeDeadline: "2023-10-31"
suggestedStreams:
streams:
- export
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ def get_json_schema(self) -> Mapping[str, Any]:
types = {
"boolean": {"type": ["null", "boolean"]},
"number": {"type": ["null", "number"], "multipleOf": 1e-20},
"datetime": {"type": ["null", "string"], "format": "date-time"},
# no format specified as values can be "2021-12-16T00:00:00", "1638298874", "15/08/53895"
"datetime": {"type": ["null", "string"]},
"object": {"type": ["null", "object"], "additionalProperties": True},
"list": {"type": ["null", "array"], "required": False, "items": {}},
"string": {"type": ["null", "string"]},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class Export(DateSlicesMixin, IncrementalMixpanelStream):
3 queries per second and 60 queries per hour.
"""

primary_key: str = "distinct_id"
primary_key: str = ["distinct_id", "event", "time"]
cursor_field: str = "time"

transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,23 +279,47 @@ def engage_schema_response():
200,
{
"results": {
"$browser": {"count": 124, "type": "string"},
"$browser_version": {"count": 124, "type": "string"},
"$created": {"count": 124, "type": "string"},
"$is_active": {"count": 412, "type": "boolean"},
"$CreatedDateTimestamp": {"count": 300, "type": "number"},
"$CreatedDate": {"count": 11, "type": "datetime"},
"$properties": {"count": 2, "type": "object"},
"$tags": {"count": 131, "type": "list"},
}
},
)


def test_engage_schema(requests_mock, engage_schema_response, config):

stream = EngageSchema(authenticator=MagicMock(), **config)
requests_mock.register_uri("GET", get_url_to_mock(stream), engage_schema_response)

records = stream.read_records(sync_mode=SyncMode.full_refresh)

records_length = sum(1 for _ in records)
assert records_length == 3
clnoll marked this conversation as resolved.
Show resolved Hide resolved
stream = Engage(authenticator=MagicMock(), **config)
requests_mock.register_uri("GET", get_url_to_mock(EngageSchema(authenticator=MagicMock(), **config)), engage_schema_response)
assert stream.get_json_schema() == {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": True,
"properties": {
"CreatedDate": {"type": ["null", "string"]},
"CreatedDateTimestamp": {"multipleOf": 1e-20, "type": ["null", "number"]},
"browser": {"type": ["null", "string"]},
"browser_version": {"type": ["null", "string"]},
"city": {"type": ["null", "string"]},
"country_code": {"type": ["null", "string"]},
"created": {"type": ["null", "string"]},
"distinct_id": {"type": ["null", "string"]},
"email": {"type": ["null", "string"]},
"first_name": {"type": ["null", "string"]},
"id": {"type": ["null", "string"]},
"is_active": {"type": ["null", "boolean"]},
"last_name": {"type": ["null", "string"]},
"last_seen": {"format": "date-time", "type": ["null", "string"]},
"name": {"type": ["null", "string"]},
"properties": {"additionalProperties": True, "type": ["null", "object"]},
"region": {"type": ["null", "string"]},
"tags": {"items": {}, "required": False, "type": ["null", "array"]},
"timezone": {"type": ["null", "string"]},
"unblocked": {"type": ["null", "string"]},
},
"type": "object",
}


def test_update_engage_schema(requests_mock, config):
Expand Down
5 changes: 5 additions & 0 deletions docs/integrations/sources/mixpanel-migrations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Mixpanel Migration Guide

## Upgrading to 1.0.0

In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading.
Loading
Loading