Skip to content

Commit

Permalink
Improve performance when retrieving Notion DB pages (#6710)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmcroft committed Jun 26, 2023
1 parent ec8247e commit ba62276
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions langchain/document_loaders/notiondb.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ def load(self) -> List[Document]:
Returns:
List[Document]: List of documents.
"""
page_ids = self._retrieve_page_ids()
page_summaries = self._retrieve_page_summaries()

return list(self.load_page(page_id) for page_id in page_ids)
return list(self.load_page(page_summary) for page_summary in page_summaries)

def _retrieve_page_ids(
def _retrieve_page_summaries(
self, query_dict: Dict[str, Any] = {"page_size": 100}
) -> List[str]:
) -> List[Dict[str, Any]]:
"""Get all the pages from a Notion database."""
pages: List[Dict[str, Any]] = []

Expand All @@ -72,18 +72,16 @@ def _retrieve_page_ids(

query_dict["start_cursor"] = data.get("next_cursor")

page_ids = [page["id"] for page in pages]
return pages

return page_ids

def load_page(self, page_id: str) -> Document:
def load_page(self, page_summary: Dict[str, Any]) -> Document:
"""Read a page."""
data = self._request(PAGE_URL.format(page_id=page_id))
page_id = page_summary["id"]

# load properties as metadata
metadata: Dict[str, Any] = {}

for prop_name, prop_data in data["properties"].items():
for prop_name, prop_data in page_summary["properties"].items():
prop_type = prop_data["type"]

if prop_type == "rich_text":
Expand Down

0 comments on commit ba62276

Please sign in to comment.