From ac3a4c6a1e4fb7b6c1f942638d57eb51ec99a2c1 Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Mon, 13 May 2024 12:27:41 +0400 Subject: [PATCH] fix(notion): pages resource returns only meta info --- sources/notion/__init__.py | 6 +++++- tests/notion/test_pages_resource.py | 28 ++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/sources/notion/__init__.py b/sources/notion/__init__.py index 38de56ce2..6a6771a41 100644 --- a/sources/notion/__init__.py +++ b/sources/notion/__init__.py @@ -28,10 +28,14 @@ def notion_pages( """ client = NotionClient(api_key) pages = client.search(filter_criteria={"value": "page", "property": "object"}) + for page in pages: + blocks = client.fetch_resource("blocks", page["id"], "children")["results"] if page_ids and page["id"] not in page_ids: continue - yield page + + if blocks: + yield blocks @dlt.source diff --git a/tests/notion/test_pages_resource.py b/tests/notion/test_pages_resource.py index 2951807f1..325131fe2 100644 --- a/tests/notion/test_pages_resource.py +++ b/tests/notion/test_pages_resource.py @@ -20,14 +20,9 @@ def test_load_all_pages(destination_name: str): loaded_tables = set(t["name"] for t in pipeline.default_schema.data_tables()) expected_tables = { - "notion_pages__properties__second_db_related__relation", - "notion_pages__properties__second_db__relation", - "notion_pages__properties__account_owner__people", - "notion_pages__properties__title__title", "notion_pages", - "notion_pages__properties__text_property__rich_text", - "notion_pages__properties__name__title", - "notion_pages__properties__company__rich_text", + "notion_pages__heading_1__rich_text", + "notion_pages__bulleted_list_item__rich_text", } assert loaded_tables == expected_tables @@ -36,6 +31,8 @@ def test_load_all_pages(destination_name: str): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) def test_load_selected_pages(destination_name: str): + page_id = "29298248-6067-4332-b6db-ca516d4d9af3" + pipeline = dlt.pipeline( pipeline_name="notion", destination=destination_name, @@ -43,13 +40,20 @@ def test_load_selected_pages(destination_name: str): full_refresh=True, ) - requested_pages = ["06e48554-9585-415b-bffe-aad4b2244f20"] - - info = pipeline.run(notion_pages(requested_pages)) + info = pipeline.run(notion_pages(page_ids=[page_id])) assert_load_info(info) - assert load_table_counts(pipeline, "notion_pages")["notion_pages"] == 1 + loaded_tables = set(t["name"] for t in pipeline.default_schema.data_tables()) + expected_tables = { + "notion_pages", + "notion_pages__heading_1__rich_text", + "notion_pages__bulleted_list_item__rich_text", + } + + assert loaded_tables == expected_tables with pipeline.sql_client() as client: with client.execute_query("SELECT * FROM notion_pages") as cur: - assert cur.fetchone()[1] in requested_pages + row = cur.fetchone() + assert row[0] == "block" + assert row[3] == page_id