Skip to content

Commit

Permalink
🏥 Source Notion: update stream schema (#35409)
Browse files Browse the repository at this point in the history
  • Loading branch information
darynaishchenko committed Feb 22, 2024
1 parent 1162302 commit 833f336
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 46 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 6e00b415-b02e-4160-bf02-58176a0ae687
dockerImageTag: 2.0.9
dockerImageTag: 2.1.0
dockerRepository: airbyte/source-notion
documentationUrl: https://docs.airbyte.com/integrations/sources/notion
githubIssueLabel: source-notion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "2.0.9"
version = "2.1.0"
name = "source-notion"
description = "Source implementation for Notion."
authors = [ "Airbyte <contact@airbyte.io>",]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@
"properties": {
"type": {
"type": ["null", "string"]
},
"info": {
"type": ["null", "object"],
"properties": {
"id": {
"type": ["null", "string"]
},
"object": {
"type": ["null", "string"]
}
}
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,38 @@
"type": {
"type": "string"
},
"info": {
"avatar_url": {
"type": ["null", "string"]
},
"id": {
"type": ["null", "string"]
},
"name": {
"type": ["null", "string"]
},
"object": {
"type": ["null", "string"]
},
"person": {
"type": ["null", "object"],
"properties": {
"email": {
"type": ["null", "string"]
},
"type": {
"type": ["null", "string"]
}
}
}
},
"workspace": {
"type": ["null", "boolean"]
}
}
},
"workspace_name": {
"type": ["null", "string"]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,22 @@ def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) ->
params["start_cursor"] = next_page_token["next_cursor"]
return params

def transform(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
owner = record.get("bot", {}).get("owner")
if owner:
owner_type = owner.get("type")
owner_info = owner.get(owner_type)
if owner_type and owner_info:
record["bot"]["owner"]["info"] = owner_info
del record["bot"]["owner"][owner_type]
return record

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
# sometimes notion api returns response without results object
data = response.json().get("results", [])
for record in data:
yield self.transform(record)


class Databases(IncrementalNotionStream):
"""
Expand Down Expand Up @@ -313,6 +329,20 @@ def stream_slices(

yield {"page_id": page_id}

def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]:
transform_object_field = record.get("type")

if transform_object_field:
rich_text = record.get(transform_object_field, {}).get("rich_text", [])
for r in rich_text:
mention = r.get("mention")
if mention:
type_info = mention[mention["type"]]
record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"]["info"] = type_info
del record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"][mention["type"]]

return record

def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
# pages and databases blocks are already fetched in their streams, so no
# need to do it again
Expand All @@ -321,7 +351,7 @@ def parse_response(self, response: requests.Response, stream_state: Mapping[str,
records = super().parse_response(response, stream_state=stream_state, **kwargs)
for record in records:
if record["type"] not in ("child_page", "child_database", "ai_block"):
yield record
yield self.transform(record)

def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]:
# if reached recursive limit, don't read anymore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,41 @@ def test_request_throttle(initial_page_size, expected_page_size, mock_response,
stream.should_retry(response=response)

assert stream.page_size == expected_page_size


def test_users_record_transformer():
stream = Users(config=MagicMock())
response_record = {
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
"bot": {"owner": {"type": "user", "user": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
"person": {"email": "email"}}}, "workspace_name": "test"}
}
expected_record = {
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
"bot": {"owner": {"type": "user", "info": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
"person": {"email": "email"}}}, "workspace_name": "test"}
}
assert stream.transform(response_record) == expected_record


def test_block_record_transformer():
stream = Blocks(parent=None, config=MagicMock())
response_record = {
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "mention", "mention": {"type": "page", "page": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"},
"plain_text": "test", "href": "https://www.notion.so/id"}], "color": "default"}
}
expected_record = {
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations":{"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text":"test", "href": None},
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "mention", "mention": {"type": "page", "info": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": "https://www.notion.so/id"}],
"color": "default"}
}
assert stream.transform(response_record) == expected_record
Loading

0 comments on commit 833f336

Please sign in to comment.