Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

馃帀 Source Notion: Adds pagination to Users stream #11452

Merged
merged 7 commits into from
Apr 22, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ class Users(NotionStream):
def path(self, **kwargs) -> str:
return "users"

def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = {"page_size": self.page_size}
if next_page_token:
params["start_cursor"] = next_page_token["next_cursor"]
return params


class Databases(IncrementalNotionStream):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
#

from http import HTTPStatus
import random
from unittest.mock import MagicMock

import pytest
import requests
from source_notion.streams import NotionStream
from airbyte_cdk.models import SyncMode
from source_notion.streams import NotionStream, Users


@pytest.fixture
Expand Down Expand Up @@ -76,3 +78,56 @@ def test_backoff_time(patch_base_class):
stream = NotionStream(config=MagicMock())
expected_backoff_time = None
assert stream.backoff_time(response_mock) == expected_backoff_time


def test_users_request_params(patch_base_class):
stream = Users(config=MagicMock())

# No next_page_token. First pull
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
expected_params = {"page_size": 100}
assert stream.request_params(**inputs) == expected_params

# When getting pages after the first pull.
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": {"next_cursor": "123"}}
expected_params = {"start_cursor": "123", "page_size": 100}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a dummy start_cursor value maybe use a self documenting value that will be provided from Notion such as "next_cursor": "fe2cc560-036c-44cd-90e8-294d5a74cebc"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. Updated in fda250e.

assert stream.request_params(**inputs) == expected_params
Comment on lines +83 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I suggest leverage parametrize wherever you can.

Suggested change
def test_users_request_params(patch_base_class):
stream = Users(config=MagicMock())
# No next_page_token. First pull
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
expected_params = {"page_size": 100}
assert stream.request_params(**inputs) == expected_params
# When getting pages after the first pull.
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": {"next_cursor": "123"}}
expected_params = {"start_cursor": "123", "page_size": 100}
assert stream.request_params(**inputs) == expected_params
@pytest.mark.parametrize("next_page_token, expected_params", [(None, {"page_size": 100}), ({"next_cursor": "123"}, {"start_cursor": "123", "page_size": 100})])
def test_users_request_params(patch_base_class, next_page_token, expected_params):
stream = Users(config=MagicMock())
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": next_page_token}
assert stream.request_params(**inputs) == expected_params



def test_user_stream_handles_pagination_correclty(requests_mock):
"""
Test shows that Users stream uses pagination as per Notion API docs.
"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I would suggest to use a function to generate your requests/responses sequence

response_body = {
"object": "list",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the original response from Notion both object and type are part of the results array. In my opinion It would be better if the tests are kept consistent with the actual implementation
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in fda250e.

"results": [{"id": f"{x}", "object": "user", "type": ["person", "bot"][random.randint(0, 1)]} for x in range(100)],
"next_cursor": "bc48234b-77b2-41a6-95a3-6a8abb7887d5",
"has_more": True,
"type": "user"
}
requests_mock.get("https://api.notion.com/v1/users?page_size=100", json=response_body)

response_body = {
"object": "list",
"results": [{"id": f"{x}", "object": "user", "type": ["person", "bot"][random.randint(0, 1)]} for x in range(100, 200)],
"next_cursor": "67030467-b97b-4729-8fd6-2fb33d012da4",
"has_more": True,
"type": "user"
}
requests_mock.get("https://api.notion.com/v1/users?page_size=100&start_cursor=bc48234b-77b2-41a6-95a3-6a8abb7887d5", json=response_body)

response_body = {
"object": "list",
"results": [{"id": f"{x}", "object": "user", "type": ["person", "bot"][random.randint(0, 1)]} for x in range(200, 220)],
"next_cursor": None,
"has_more": False,
"type": "user"
}
requests_mock.get("https://api.notion.com/v1/users?page_size=100&start_cursor=67030467-b97b-4729-8fd6-2fb33d012da4", json=response_body)

stream = Users(config=MagicMock())

records = stream.read_records(sync_mode=SyncMode.full_refresh)
records_length = sum(1 for _ in records)
assert records_length == 220