Skip to content

Bug: Cannot use upload script to upload code blocks with > 2000 characters #4

@adamtheturtle

Description

@adamtheturtle

I got:

notion_client.errors.APIResponseError: body failed validation: body.children[95].code.rich_text[0].text.content.length should be ≤ `2000`, instead was `5867`.

I searched and found many articles e.g. https://medium.com/@jan.lenaerts/overcoming-notions-2k-character-limit-with-n8n-workflows-9a1f7ccc7ee8 which recommended splitting blocks.

I got Claude to modify the upload script and I got this which works:

import argparse
import json
import os
from pathlib import Path
from typing import Any, cast

from notion_client import Client

NOTION_RICH_TEXT_LIMIT = 2000


_Block = dict[str, Any]
_RichTextBlock = dict[str, Any]


def _split_rich_text(rich_text: list[_RichTextBlock]) -> list[_RichTextBlock]:
    """
    Given a list of rich_text objects, split any 'text.content' >2000 chars
    into multiple objects, preserving all other fields (annotations, links,
    etc).
    """
    new_rich_text: list[_RichTextBlock] = []
    for obj in rich_text:
        if obj.get("type") == "text" and "content" in obj["text"]:
            content = obj["text"]["content"]
            if len(content) > NOTION_RICH_TEXT_LIMIT:
                # Split content into chunks
                for i in range(0, len(content), NOTION_RICH_TEXT_LIMIT):
                    chunk = content[i:i+NOTION_RICH_TEXT_LIMIT]
                    new_obj = json.loads(s=json.dumps(obj=obj))  # deep copy
                    new_obj["text"]["content"] = chunk
                    new_rich_text.append(new_obj)
            else:
                new_rich_text.append(obj)
        else:
            new_rich_text.append(obj)
    return new_rich_text


def _process_block(block: _Block) -> _Block:
    """
    Recursively process a Notion block dict, splitting any rich_text >2000
    chars.
    """
    block = dict(block)  # shallow copy
    for key, value in block.items():
        if isinstance(value, dict):
            # Check for 'rich_text' key
            if "rich_text" in value and isinstance(value["rich_text"], list):
                rich_text_list = cast(
                    "list[_RichTextBlock]", value["rich_text"]
                )
                value["rich_text"] = _split_rich_text(rich_text=rich_text_list)
            # Recurse into dict
            typed_value = cast("_Block", value)
            block[key] = _process_block(block=typed_value)
        elif isinstance(value, list):
            # Recurse into list elements
            processed_list: list[Any] = []
            for v in value:  # pyright: ignore[reportUnknownVariableType]
                if isinstance(v, dict):
                    typed_v = cast("_Block", v)
                    processed_list.append(_process_block(block=typed_v))
                else:
                    processed_list.append(v)
            block[key] = processed_list
    return block


def _process_blocks(blocks: list[_Block]) -> list[_Block]:
    return [_process_block(block=block) for block in blocks]


notion = Client(auth=os.environ["NOTION_TOKEN"])

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Upload to Notion")
    parser.add_argument(
        "-f", "--file", help="JSON File to upload", required=True, type=Path
    )
    parser.add_argument(
        "-p",
        "--parent_page_id",
        help="Parent page ID (integration connected)",
        required=True,
    )
    parser.add_argument(
        "-t", "--title", help="Title of the new page", required=True
    )
    args = parser.parse_args()

    with args.file.open("r", encoding="utf-8") as f:
        contents = json.load(fp=f)

    # Workaround Notion 2k char limit: preprocess contents
    processed_contents = _process_blocks(blocks=contents)

    new_page = notion.pages.create(
        parent={"type": "page_id", "page_id": args.parent_page_id},
        properties={
            "title": {"title": [{"text": {"content": args.title}}]},
        },
        children=processed_contents,
    )

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions