Skip to content

Commit

Permalink
🚨 🚨 Source Mixpanel: fix typing (#30025)
Browse files Browse the repository at this point in the history
Co-authored-by: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Co-authored-by: Pedro S. Lopez <pedroslopez@me.com>
Co-authored-by: pedroslopez <pedroslopez@users.noreply.github.com>
Co-authored-by: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com>
  • Loading branch information
5 people authored and girarda committed Oct 4, 2023
1 parent dceca41 commit bd0e9c9
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 59 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mixpanel/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]


LABEL io.airbyte.version=0.1.41
LABEL io.airbyte.version=1.0.0
LABEL io.airbyte.name=airbyte/source-mixpanel
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-mixpanel:dev
# custom configuration is used for tests to speed up testing and avoid hitting rate limits
custom_environment_variables:
REQS_PER_HOUR_LIMIT: 0
AVAILABLE_TESTING_RANGE_DAYS: 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_cursor": true,
"default_cursor_field": ["time"],
"source_defined_primary_key": [["distinct_id"]]
"source_defined_primary_key": [["distinct_id"], ["event"], ["time"]]
},
"sync_mode": "incremental",
"destination_sync_mode": "append",
"cursor_field": ["time"],
"primary_key": [["distinct_id"]]
"primary_key": [["distinct_id"], ["event"], ["time"]]
},
{
"stream": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a
dockerImageTag: 0.1.41
dockerImageTag: 1.0.0
dockerRepository: airbyte/source-mixpanel
githubIssueLabel: source-mixpanel
icon: mixpanel.svg
Expand All @@ -18,6 +18,11 @@ data:
oss:
enabled: true
releaseStage: generally_available
releases:
breakingChanges:
1.0.0:
message: In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading.
upgradeDeadline: "2023-10-31"
suggestedStreams:
streams:
- export
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ def get_json_schema(self) -> Mapping[str, Any]:
types = {
"boolean": {"type": ["null", "boolean"]},
"number": {"type": ["null", "number"], "multipleOf": 1e-20},
"datetime": {"type": ["null", "string"], "format": "date-time"},
# no format specified as values can be "2021-12-16T00:00:00", "1638298874", "15/08/53895"
"datetime": {"type": ["null", "string"]},
"object": {"type": ["null", "object"], "additionalProperties": True},
"list": {"type": ["null", "array"], "required": False, "items": {}},
"string": {"type": ["null", "string"]},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class Export(DateSlicesMixin, IncrementalMixpanelStream):
3 queries per second and 60 queries per hour.
"""

primary_key: str = "distinct_id"
primary_key: str = ["distinct_id", "event", "time"]
cursor_field: str = "time"

transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,23 +279,47 @@ def engage_schema_response():
200,
{
"results": {
"$browser": {"count": 124, "type": "string"},
"$browser_version": {"count": 124, "type": "string"},
"$created": {"count": 124, "type": "string"},
"$is_active": {"count": 412, "type": "boolean"},
"$CreatedDateTimestamp": {"count": 300, "type": "number"},
"$CreatedDate": {"count": 11, "type": "datetime"},
"$properties": {"count": 2, "type": "object"},
"$tags": {"count": 131, "type": "list"},
}
},
)


def test_engage_schema(requests_mock, engage_schema_response, config):

stream = EngageSchema(authenticator=MagicMock(), **config)
requests_mock.register_uri("GET", get_url_to_mock(stream), engage_schema_response)

records = stream.read_records(sync_mode=SyncMode.full_refresh)

records_length = sum(1 for _ in records)
assert records_length == 3
stream = Engage(authenticator=MagicMock(), **config)
requests_mock.register_uri("GET", get_url_to_mock(EngageSchema(authenticator=MagicMock(), **config)), engage_schema_response)
assert stream.get_json_schema() == {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": True,
"properties": {
"CreatedDate": {"type": ["null", "string"]},
"CreatedDateTimestamp": {"multipleOf": 1e-20, "type": ["null", "number"]},
"browser": {"type": ["null", "string"]},
"browser_version": {"type": ["null", "string"]},
"city": {"type": ["null", "string"]},
"country_code": {"type": ["null", "string"]},
"created": {"type": ["null", "string"]},
"distinct_id": {"type": ["null", "string"]},
"email": {"type": ["null", "string"]},
"first_name": {"type": ["null", "string"]},
"id": {"type": ["null", "string"]},
"is_active": {"type": ["null", "boolean"]},
"last_name": {"type": ["null", "string"]},
"last_seen": {"format": "date-time", "type": ["null", "string"]},
"name": {"type": ["null", "string"]},
"properties": {"additionalProperties": True, "type": ["null", "object"]},
"region": {"type": ["null", "string"]},
"tags": {"items": {}, "required": False, "type": ["null", "array"]},
"timezone": {"type": ["null", "string"]},
"unblocked": {"type": ["null", "string"]},
},
"type": "object",
}


def test_update_engage_schema(requests_mock, config):
Expand Down
5 changes: 5 additions & 0 deletions docs/integrations/sources/mixpanel-migrations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Mixpanel Migration Guide

## Upgrading to 1.0.0

In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading.
Loading

0 comments on commit bd0e9c9

Please sign in to comment.