In [1]:
import re

from notion_client import Client
import os
from dotenv import load_dotenv
from utils import link2id, clean_latex
import pprint
from rich import print as prettyprint
import uuid
load_dotenv()

True

In [2]:
api_key = os.getenv('NOTION_KEY')

def fetch_notion_data(token, id):
    notion = Client(auth=token)
    page_data = notion.blocks.children.list(block_id=id)
    return page_data

In [3]:
link = "https://www.notion.so/ruyoga/testing-9dc1cff1b6e14172ab35cb42a1b4bfda?pvs=4"
id = link2id(link)

page_data = fetch_notion_data(api_key, id)

In [4]:
def generate_block_id():
    return uuid.uuid4()

In [5]:
def get_append_endpoint(id):
    """
    :param id: if im not mistaken you could also put the id of the page 
    :return: 
    """
    return f"https://api.notion.com/v1/blocks/{id}/children"

In [6]:
import uuid
import re

def generate_block_id():
    return str(uuid.uuid4())

def parse_to_notion_blocks(text):
    blocks = []
    square_re = r'\\\[(.*?)\\\]'
    parentheses_re = r'\\\((.*?)\\\)'

    # Split the string into paragraphs
    paragraphs = text.split('\n\n')
    for paragraph in paragraphs:
        # Check if the paragraph is a standalone equation
        if re.fullmatch(square_re, paragraph.strip()) or re.fullmatch(parentheses_re, paragraph.strip()):
            blocks.append(create_equation_block(paragraph.strip()[2:-2]))
        else:
            # Split the paragraph into text and inline equations
            parts = re.split(parentheses_re, paragraph)
            for part in parts:
                if part:
                    if re.fullmatch(square_re, part) or re.fullmatch(parentheses_re):
                        blocks.extend(create_equation_inline(part[2:-2]))
                    else:
                        blocks.extend(create_text_block(part))

    return {
        'object': 'list',
        'results': blocks,
        'next_cursor': None,
        'has_more': False,
        'type': 'block',
        'block': {},
        'request_id': generate_block_id()
    }

def create_text_block(content):
    if content.strip() == "":
        return []
    return [{
        'object': 'block',
        'id': generate_block_id(),
        'type': 'paragraph',
        'paragraph': {
            'text': [{
                'type': 'text',
                'text': {
                    'content': clean_latex(content),
                    'link': None
                }
            }]
        }
    }]

def create_equation_inline(expression):
    return [{
        'object': 'block',
        'id': generate_block_id(),
        'type': 'equation',
        'equation': {
            'expression': clean_latex(expression)
        }
    }]

def create_equation_block(expression):
    return {
        'object': 'block',
        'id': generate_block_id(),
        'type': 'equation',
        'equation': {
            'expression': clean_latex(expression)
        }
    }

# Example usage
input_text = r"""
Given an uniformly-distributed random variable \( X \) that takes only integers from \( a \) to \( b \) inclusive and

\[ P(3 \leq X \leq 7) = \frac{1}{21} \]

\[ P(0 \leq X \leq 5) = \frac{1}{35} \]

What is \( P(90 \leq X \leq 110) \), if \( a \leq 3 \) and \( b \geq 10 \)?
"""

notion_json = parse_to_notion_blocks(input_text)
print(notion_json)

{'object': 'list', 'results': [{'object': 'block', 'id': '779fbe78-4429-4373-b647-2471f439c22a', 'type': 'paragraph', 'paragraph': {'text': [{'type': 'text', 'text': {'content': '\nGiven an uniformly-distributed random variable  X  that takes only integers from  a  to  b  inclusive and', 'link': None}}]}}, {'object': 'block', 'id': '307c0c5f-c579-46e8-964e-e2f596713b92', 'type': 'paragraph', 'paragraph': {'text': [{'type': 'text', 'text': {'content': ' P(3 \\leq X \\leq 7) = \\frac{1}{21} ', 'link': None}}]}}, {'object': 'block', 'id': '18d64c96-7eaa-4ea4-8ff1-094b070551c5', 'type': 'paragraph', 'paragraph': {'text': [{'type': 'text', 'text': {'content': ' P(0 \\leq X \\leq 5) = \\frac{1}{35} ', 'link': None}}]}}, {'object': 'block', 'id': '68dc8e4a-aaa0-450b-be5a-fca262268641', 'type': 'paragraph', 'paragraph': {'text': [{'type': 'text', 'text': {'content': 'What is  P(90 \\leq X \\leq 110) , if  a \\leq 3  and  b \\geq 10 ?\n', 'link': None}}]}}], 'next_cursor': None, 'has_more': Fal

In [7]:
prettyprint(notion_json)

In [8]:
test_input = r"""

Given an uniformly-distributed random variable \( X \) that takes only integers from \( a \) to \( b \) inclusive and

\[ P(3 \leq X \leq 7) = \frac{1}{21} \]

\[ P(0 \leq X \leq 5) = \frac{1}{35} \]

What is \( P(90 \leq X \leq 110) \), if \( a \leq 3 \) and \( b \geq 10 \)?

"""

In [9]:
test_output = parse_to_notion_blocks(test_input)

In [10]:
prettyprint(test_output)