# `NotionTypes`

> Represents the types of Notion objects like text, number, select, multi-select, etc.

In [None]:
# | default_exp model.notion_typing

In [None]:
# | export

import typing as t
from datetime import datetime

from ragas_annotator.exceptions import ValidationError

T = t.TypeVar("T")

In [None]:
# | export


class Field(t.Generic[T]):
    """Base class for all Notion field types."""

    NOTION_FIELD_TYPE = ""
    _type: t.Type[T]

    def __init__(self, required: bool = True):
        self.required = required
        self.name: str = ""
        super().__init__()

    def __set_name__(self, owner: t.Type, name: str):
        """Set the field name when the class is created."""
        self.name = name
        if not hasattr(owner, "_fields"):
            owner._fields = {}
        owner._fields[name] = self

    def __get__(self, instance, owner=None):
        """Implement descriptor protocol for getting field values."""
        if instance is None:
            return self
        return instance._values.get(self.name)

    def __set__(self, instance, value):
        """Implement descriptor protocol for setting field values."""
        if instance is None:
            return
        value = self.validate(value)
        instance._values[self.name] = value

    def validate(self, value: t.Any) -> t.Any:
        """Validate the field value."""
        if value is None and self.required:
            raise ValidationError(f"Field {self.name} is required")
        return value

    def _to_notion(self, value: t.Any) -> dict:
        """Convert Python value to Notion format."""
        raise NotImplementedError

    def _from_notion(self, data: dict) -> t.Any:
        """Convert Notion format to Python value."""
        raise NotImplementedError

    def _to_notion_property(self) -> dict:
        """Convert field to Notion property definition format."""
        return {self.name: {"type": self.NOTION_FIELD_TYPE, self.NOTION_FIELD_TYPE: {}}}

In [None]:
# | hide
SAMPLE_NOTION_DATA = {
    "title_field": {
        "type": "title",
        "title": [
            {
                "type": "text",
                "text": {"content": "What is product led growth?", "link": None},
                "plain_text": "What is product led growth?",
                "href": None,
            }
        ],
    },
    "text_field": {
        "type": "rich_text",
        "rich_text": [
            {
                "type": "text",
                "text": {
                    "content": "Product-led Growth (PLG) is a business strategy",
                    "link": None,
                },
                "plain_text": "Product-led Growth (PLG) is a business strategy",
            }
        ],
    },
    "select_field": {
        "type": "select",
        "select": {"id": "DJTw", "name": "elena", "color": "default"},
    },
    "multi_select_field": {
        "type": "multi_select",
        "multi_select": [
            {"id": "cf54414e", "name": "indexing_issues", "color": "orange"},
            {"id": "abc123", "name": "performance", "color": "blue"},
        ],
    },
    "id_field": {"type": "unique_id", "unique_id": {"prefix": None, "number": 42}},
    "url_field": {"type": "url", "url": "https://www.google.com"},
}

## `ID`

Represents a unique identifier for any NotionModel object.

In [None]:
# | export
class ID(Field[int], int):
    """System ID field type for integer IDs."""

    NOTION_FIELD_TYPE = "unique_id"
    _type = int

    def __new__(cls, *args, **kwargs):
        return int.__new__(cls)

    def __init__(self, required: bool = False):
        super().__init__(required=required)

    def validate(self, value: t.Optional[int]) -> t.Optional[int]:
        value = super().validate(value)
        if value is not None and not isinstance(value, int):
            raise ValidationError(f"ID must be an integer, got {type(value)}")
        return value

    def _to_notion(self, value: int) -> dict:
        return {self.name: {"type": "unique_id", "unique_id": value}}

    def _from_notion(self, data: dict) -> t.Optional[int]:
        if "properties" in data:
            if self.name in data["properties"]:
                return data["properties"][self.name]["unique_id"]["number"]
        else:
            if self.name in data:
                return data[self.name]["unique_id"]["number"]
        # if not found and required, raise error
        if self.required:
            raise ValidationError(
                f"ID field {self.name} is required but not found in the data"
            )
        else:
            return None

    def _to_notion_property(self) -> dict:
        return {self.name: {"type": "unique_id", "unique_id": {"prefix": None}}}

In [None]:
#| hide
import pytest
from fastcore.test import *

In [None]:
# | hide
# test to_notion
field = ID()
field.name = "id"
result = field._to_notion(42)
assert result == {"id": {"type": "unique_id", "unique_id": 42}}

# test from_notion
result = field._from_notion({"id": SAMPLE_NOTION_DATA["id_field"]})
assert result == 42

# test from_notion with no id
result = field._from_notion({})
assert result is None

# test validate
assert field.validate(42) == 42
assert field.validate(None) is None
with pytest.raises(ValidationError):
    field.validate("not_an_integer")  # type: ignore (since we are testing the validation)

## `Text`

Represents notion's rich text property type

In [None]:
#| export
class Text(Field[str], str):
    """Rich text property type."""

    NOTION_FIELD_TYPE = "rich_text"
    _type = str
    CHUNK_SIZE = 2000  # Notion's character limit per rich text block

    def __new__(cls, *args, **kwargs):
        return str.__new__(cls)

    def __init__(self, required: bool = True):
        super().__init__(required=required)

    def _to_notion(self, value: str) -> dict:
        # Split the text into chunks of CHUNK_SIZE characters
        if not value:
            return {self.name: {self.NOTION_FIELD_TYPE: []}}
        
        chunks = [value[i:i+self.CHUNK_SIZE] for i in range(0, len(value), self.CHUNK_SIZE)]
        rich_text_array = [{"text": {"content": chunk}} for chunk in chunks]
        
        return {self.name: {self.NOTION_FIELD_TYPE: rich_text_array}}

    def _from_notion(self, data: dict) -> t.Optional[str]:
        # Handle both direct and properties-wrapped format
        if "properties" in data:
            rich_text = data["properties"][self.name][self.NOTION_FIELD_TYPE]
        else:
            rich_text = data[self.name][self.NOTION_FIELD_TYPE]
        
        if not rich_text:
            return None
            
        # Combine all text chunks into a single string
        return "".join(item["text"]["content"] for item in rich_text if "text" in item)

#| hide

what all should we test for `Text()`

1. Make sure validate works for strings
2. Make sure validate works for non-string like other python objects - we can convert them to string with python.
3. Make sure larger texts with greater than 2000 pages are broken down and read from effectively

In [None]:
# | hide
# test to_notion
field = Text()
field.name = "text"
result = field._to_notion("test")
test_eq(result, {"text": {"rich_text": [{'text': {'content': "test"}}]}})

# test from_notion
result = field._from_notion({"text": SAMPLE_NOTION_DATA["text_field"]})
test_eq(result, "Product-led Growth (PLG) is a business strategy")

# test validate
test_eq(field.validate("test"), "test")
#test_eq(field.validate(42), "42")
#test_eq(field.validate(None), None)

In [None]:
# | hide
# Test Text class with chunking functionality

# 1. Test with short text (under 2000 chars)
field = Text()
field.name = "text"
short_text = "This is a short text"
result = field._to_notion(short_text)
test_eq(result, {"text": {"rich_text": [{'text': {'content': short_text}}]}})
test_eq(len(result["text"]["rich_text"]), 1)

# 2. Test with exactly 2000 chars (boundary case)
text_2000 = "a" * 2000
result = field._to_notion(text_2000)
test_eq(len(result["text"]["rich_text"]), 1)
test_eq(len(result["text"]["rich_text"][0]["text"]["content"]), 2000)

# 3. Test with long text (over 2000 chars)
long_text = "a" * 4500  # Should create 3 chunks
result = field._to_notion(long_text)
test_eq(len(result["text"]["rich_text"]), 3)
test_eq(len(result["text"]["rich_text"][0]["text"]["content"]), 2000)
test_eq(len(result["text"]["rich_text"][1]["text"]["content"]), 2000)
test_eq(len(result["text"]["rich_text"][2]["text"]["content"]), 500)

# 4. Test from_notion with multiple chunks
multi_chunk_data = {
    "text": {
        "rich_text": [
            {"text": {"content": "First chunk. "}},
            {"text": {"content": "Second chunk. "}},
            {"text": {"content": "Last chunk."}}
        ]
    }
}
result = field._from_notion(multi_chunk_data)
test_eq(result, "First chunk. Second chunk. Last chunk.")

# 5. Test empty text
result = field._to_notion("")
test_eq(result, {"text": {"rich_text": []}})

# 6. Test from_notion with empty rich_text
empty_data = {"text": {"rich_text": []}}
result = field._from_notion(empty_data)
test_eq(result, None)

## `Title`

In [None]:
# | export
class Title(Field[str], str):
    """Title property type."""

    NOTION_FIELD_TYPE = "title"
    _type = str

    def __new__(cls, *args, **kwargs):
        return str.__new__(cls)

    def __init__(self, required: bool = True):
        super().__init__(required=required)

    def _to_notion(self, value: str) -> dict:
        return {self.name: {self.NOTION_FIELD_TYPE: [{"text": {"content": value}}]}}

    def _from_notion(self, data: dict) -> t.Optional[str]:
        if "properties" in data:
            title = data["properties"][self.name][self.NOTION_FIELD_TYPE]
        else:
            title = data[self.name][self.NOTION_FIELD_TYPE]
        if not title:
            return None
        return title[0]["text"]["content"]

In [None]:
# | export
class Select(Field[str], str):
    """Select property type."""

    NOTION_FIELD_TYPE = "select"
    _type = str

    def __new__(cls, *args, **kwargs):
        return str.__new__(cls)

    def __init__(self, options: t.Optional[list[str]] = None, required: bool = True):
        self.options = options
        super().__init__(required=required)

    def validate(self, value: t.Optional[str]) -> t.Optional[str]:
        value = super().validate(value)
        if value == "":  # Allow empty string for optional fields
            return value
        if value is not None and self.options and value not in self.options:
            raise ValidationError(
                f"Value {value} not in allowed options: {self.options}"
            )
        return value

    def _to_notion(self, value: str) -> dict:
        return {self.name: {self.NOTION_FIELD_TYPE: {"name": value}}}

    def _from_notion(self, data: dict) -> t.Optional[str]:
        if "properties" in data:
            select_data = data["properties"][self.name][self.NOTION_FIELD_TYPE]
        else:
            select_data = data[self.name][self.NOTION_FIELD_TYPE]
        if select_data is None:
            return None
        return select_data["name"]

    def _to_notion_property(self) -> dict:
        prop = super()._to_notion_property()
        if self.options:
            prop[self.name]["select"]["options"] = [
                {"name": option} for option in self.options
            ]
        return prop

In [None]:
# | export
class MultiSelect(Field[list[str]], list):
    """Multi-select property type."""

    NOTION_FIELD_TYPE = "multi_select"
    _type = list

    def __new__(cls, *args, **kwargs):
        return list.__new__(cls)

    def __init__(self, options: t.Optional[list[str]] = None, required: bool = True):
        self.options = options
        super().__init__(required=required)

    def validate(self, value: t.Optional[list[str]]) -> t.Optional[list[str]]:
        value = super().validate(value)
        if value is not None and self.options:
            invalid_options = [v for v in value if v not in self.options]
            if invalid_options:
                raise ValidationError(
                    f"Values {invalid_options} not in allowed options: {self.options}"
                )
        return value

    def _to_notion(self, value: list[str]) -> dict:
        return {
            self.name: {self.NOTION_FIELD_TYPE: [{"name": option} for option in value]}
        }

    def _from_notion(self, data: dict) -> list[str]:
        if "properties" in data:
            multi_select = data["properties"][self.name][self.NOTION_FIELD_TYPE]
        else:
            multi_select = data[self.name][self.NOTION_FIELD_TYPE]
        if not multi_select:
            return []
        return [item["name"] for item in multi_select]

    def _to_notion_property(self) -> dict:
        prop = super()._to_notion_property()
        if self.options:
            prop[self.name]["multi_select"]["options"] = [
                {"name": option} for option in self.options
            ]
        return prop

In [None]:
# | export
class URL(Field[str], str):
    """URL property type."""

    NOTION_FIELD_TYPE = "url"
    _type = str

    def __new__(cls, *args, **kwargs):
        return str.__new__(cls)

    def __init__(self, required: bool = False):
        super().__init__(required=required)

    def validate(self, value: t.Optional[str]) -> t.Optional[str]:
        value = super().validate(value)
        if value is not None and not isinstance(value, str):
            raise ValidationError(f"URL must be a string, got {type(value)}")
        return value

    def _to_notion(self, value: str) -> dict:
        return {self.name: {self.NOTION_FIELD_TYPE: value}}

    def _from_notion(self, data: dict) -> t.Optional[str]:
        if "properties" in data:
            url = data["properties"][self.name][self.NOTION_FIELD_TYPE]
        else:
            url = data[self.name][self.NOTION_FIELD_TYPE]
        return url