In [None]:
%load_ext autoreload
%autoreload 2
from IPython.display import display
import nest_asyncio
import os
from pprint import pprint
import sys

from notion_api import get_id_from_url
from _secrets import NOTION_API_KEY

nest_asyncio.apply()

FRONTPAGE_NOTION_URL = "https://www.notion.so/gregdetre-www/Greg-Detre-23436b4ed78942aa8b4932c0b12ebe5d"
FRONTPAGE_SITE_URL = "https://gregdetre-www.notion.site/Greg-Detre-23436b4ed78942aa8b4932c0b12ebe5d"
FRONTPAGE_NOTION_ID = get_id_from_url(FRONTPAGE_NOTION_URL)

DATABASE_NOTION_URL = "https://www.notion.so/gregdetre-www/Blog-cbadf8ca01994be28e4d1164ce787189"
DATABASE_NOTION_ID = get_id_from_url(DATABASE_NOTION_URL)

OUTPUT_DIR = './out/'

In [None]:
# # https://github.com/bogdankostic/notion-exporter

# from notion_exporter import NotionExporter

# exporter = NotionExporter(notion_token=NOTION_API_KEY)
# exported_pages = exporter.export_pages(page_ids=["23436b4ed78942aa8b4932c0b12ebe5d"])

# # exported_pages will be a dictionary where the keys are the page ids and 
# # the values are the page content in markdown format

# exported_pages
# page = list(exported_pages.values())[0]
# page

In [None]:
from notion_api import get_blocks
from notion_client import Client

notion = Client(auth=NOTION_API_KEY)

blocks = get_blocks(notion, FRONTPAGE_NOTION_ID, verbose=1)
blocks

In [None]:
# https://claude.ai/chat/dd788040-003a-4ca6-8569-7d6588e4523e

import json
from typing import List, Dict, Any

def notion_to_html(blocks: List[Dict[str, Any]]) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type == 'heading_1':
            html.append(heading_to_html(block, block_id, 'h1'))
        elif block_type == 'heading_2':
            html.append(heading_to_html(block, block_id, 'h2'))
        elif block_type == 'heading_3':
            html.append(heading_to_html(block, block_id, 'h3'))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(f'<hr data-block-id="{block_id}">')
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def rich_text_to_html(rich_text: List[Dict[str, Any]]) -> str:
    html = []
    for text in rich_text:
        content = text['text']['content']
        link = text['text'].get('link')
        annotations = text['annotations']

        if annotations['bold']:
            content = f'<strong>{content}</strong>'
        if annotations['italic']:
            content = f'<em>{content}</em>'
        if annotations['strikethrough']:
            content = f'<del>{content}</del>'
        if annotations['underline']:
            content = f'<u>{content}</u>'
        if annotations['code']:
            content = f'<code>{content}</code>'

        if link:
            content = f'<a href="{link["url"]}">{content}</a>'

        html.append(content)

    return ''.join(html)

def paragraph_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['paragraph']['rich_text'])
    return f'<p data-block-id="{block_id}">{content}</p>'

def heading_to_html(block: Dict[str, Any], block_id: str, tag: str) -> str:
    content = rich_text_to_html(block[block['type']]['rich_text'])
    return f'<{tag} data-block-id="{block_id}">{content}</{tag}>'

def list_item_to_html(block: Dict[str, Any], block_id: str, list_type: str) -> str:
    content = rich_text_to_html(block[block['type']]['rich_text'])
    return f'<{list_type}><li data-block-id="{block_id}">{content}</li></{list_type}>'

def todo_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['to_do']['rich_text'])
    checked = 'checked' if block['to_do']['checked'] else ''
    return f'<div data-block-id="{block_id}"><input type="checkbox" {checked} disabled> {content}</div>'

def toggle_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['toggle']['rich_text'])
    return f'<details data-block-id="{block_id}"><summary>{content}</summary></details>'

def child_page_to_html(block: Dict[str, Any], block_id: str) -> str:
    title = block['child_page']['title']
    return f'<div data-block-id="{block_id}"><a href="#">{title}</a> (child page)</div>'

def image_to_html(block: Dict[str, Any], block_id: str) -> str:
    image_type = block['image']['type']
    if image_type == 'external':
        url = block['image']['external']['url']
    elif image_type == 'file':
        url = block['image']['file']['url']
    else:
        url = '#'
    caption = rich_text_to_html(block['image'].get('caption', []))
    return f'<figure data-block-id="{block_id}"><img src="{url}" alt="{caption}"><figcaption>{caption}</figcaption></figure>'

def code_to_html(block: Dict[str, Any], block_id: str) -> str:
    code = block['code']['rich_text'][0]['text']['content']
    language = block['code']['language']
    return f'<pre data-block-id="{block_id}"><code class="language-{language}">{code}</code></pre>'

def quote_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['quote']['rich_text'])
    return f'<blockquote data-block-id="{block_id}">{content}</blockquote>'

# Usage example
# blocks = json.loads(notion_api_output)  # Replace with your actual Notion API output
html_output = notion_to_html(blocks)
print(html_output)

In [None]:
def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    html_content = notion_to_html(blocks)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = page['properties'].get('title', {}).get('title', [{}])[0].get('plain_text', 'Untitled')
    
    # Create a safe filename
    safe_title = "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip()
    filename = f"{safe_title}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{title}</title></head><body>")
        f.write(html_content)
        f.write("</body></html>")
    
    print(f"Created file: {filename}")
    
    # Process child pages
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)

Hmmm. I ran it, feeding in the database Notion ID as the input page, and it generated just that html file, still with "Unsupported block type: child_database".

Looking at your code, I can't see where it even calls the new child_database_to_html file. Did I do something wrong?

In [None]:
def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database
    results = notion.databases.query(database_id=database_id).get("results")
    
    # Generate HTML for database entries
    entries_html = []
    for page in results:
        page_id = page['id']
        page_title = page['properties']['Name']['title'][0]['text']['content']
        page_url = f"{page_title.lower().replace(' ', '-')}.html"
        entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
        
        # Generate individual page HTML
        page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
        page_html = notion_to_html(page_blocks)
        
        # Write individual page HTML to file
        with open(page_url, 'w', encoding='utf-8') as f:
            f.write(f"<html><head><title>{page_title}</title></head><body>{page_html}</body></html>")
    
    # Return HTML for database listing
    return f'''
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    '''

def process_notion_page(notion: Client, page_id: str, output_dir: str = '.'):
    blocks = notion.blocks.children.list(block_id=page_id).get("results")
    html_output = notion_to_html(blocks)
    
    # Get page title
    page = notion.pages.retrieve(page_id=page_id)
    page_title = page['properties']['title']['title'][0]['text']['content']
    
    # Write main page HTML to file
    output_file = os.path.join(output_dir, f"{page_title.lower().replace(' ', '-')}.html")
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(f"<html><head><title>{page_title}</title></head><body>{html_output}</body></html>")
    
    print(f"Generated {output_file}")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR, exist_ok=True)

process_notion_page(notion, DATABASE_NOTION_ID, OUTPUT_DIR)

In [None]:
blocks = get_blocks(notion, DATABASE_NOTION_ID, verbose=1)
blocks

Ok, we're making progress.

Please can we include the sub-page title in the html filename. Right now, they're all Untitled_[NOTION_ID].html

And please can we fix Unsupported block type: bulleted_list_item

In [None]:
def notion_to_html(blocks: list) -> str:
    html = []
    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'child_database':
            html.append(child_database_to_html(block, block_id))
        elif block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        # ... (include other block type handlers)
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def child_database_to_html(block: Dict[str, Any], block_id: str) -> str:
    title = block['child_database']['title']
    return f'<div data-block-id="{block_id}"><h3>Database: {title}</h3><p>See separate files for database entries.</p></div>'

# ... (include other block type handler functions)

def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    html_content = notion_to_html(blocks)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = page['properties'].get('title', {}).get('title', [{}])[0].get('plain_text', 'Untitled')
    
    # Create a safe filename
    safe_title = "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip()
    filename = f"{safe_title}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{title}</title></head><body>")
        f.write(html_content)
        f.write("</body></html>")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

def process_database(notion: Client, database_id: str, output_dir: str):
    database = notion.databases.retrieve(database_id)
    database_title = database['title'][0]['plain_text'] if database['title'] else 'Untitled Database'
    
    # Query all items in the database
    results = notion.databases.query(database_id=database_id).get('results')
    
    for item in results:
        item_id = item['id']
        item_title = item['properties'].get('Name', {}).get('title', [{}])[0].get('plain_text', 'Untitled')
        
        # Process each database item as a page
        process_page_recursively(notion, item_id, output_dir)

os.makedirs(OUTPUT_DIR, exist_ok=True)
    
# Start the recursive process from the root page
process_page_recursively(notion, DATABASE_NOTION_ID, OUTPUT_DIR)


Ok, that's good. A few issues:

- It produced 100 pages, out of about 170, all with titles from the second half of the alphabet. Could there be an issue with the pagination?
- It wrote the database child pages out to the current directory, but it should have written them into OUTPUT_DIR.
- Please add the page title as an H1 at the top.

In [None]:
import os
from typing import List, Dict, Any
from notion_client import Client

def notion_to_html(blocks: List[Dict[str, Any]], notion: Client) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type.startswith('heading_'):
            html.append(heading_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(f'<hr data-block-id="{block_id}">')
        elif block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion))
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def rich_text_to_html(rich_text: List[Dict[str, Any]]) -> str:
    html = []
    for text in rich_text:
        content = text['plain_text']
        link = text.get('href')
        annotations = text['annotations']

        if annotations['bold']:
            content = f'<strong>{content}</strong>'
        if annotations['italic']:
            content = f'<em>{content}</em>'
        if annotations['strikethrough']:
            content = f'<del>{content}</del>'
        if annotations['underline']:
            content = f'<u>{content}</u>'
        if annotations['code']:
            content = f'<code>{content}</code>'

        if link:
            content = f'<a href="{link}">{content}</a>'

        html.append(content)

    return ''.join(html)

def paragraph_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['paragraph']['rich_text'])
    return f'<p data-block-id="{block_id}">{content}</p>'

def heading_to_html(block: Dict[str, Any], block_id: str) -> str:
    heading_level = block['type'].split('_')[1]
    content = rich_text_to_html(block[block['type']]['rich_text'])
    return f'<h{heading_level} data-block-id="{block_id}">{content}</h{heading_level}>'

def list_item_to_html(block: Dict[str, Any], block_id: str, list_type: str) -> str:
    content = rich_text_to_html(block[block['type']]['rich_text'])
    return f'<li data-block-id="{block_id}">{content}</li>'

def todo_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['to_do']['rich_text'])
    checked = 'checked' if block['to_do']['checked'] else ''
    return f'<div data-block-id="{block_id}"><input type="checkbox" {checked} disabled> {content}</div>'

def toggle_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['toggle']['rich_text'])
    return f'<details data-block-id="{block_id}"><summary>{content}</summary></details>'

def child_page_to_html(block: Dict[str, Any], block_id: str) -> str:
    title = block['child_page']['title']
    return f'<div data-block-id="{block_id}"><a href="#">{title}</a> (child page)</div>'

def image_to_html(block: Dict[str, Any], block_id: str) -> str:
    image_type = block['image']['type']
    if image_type == 'external':
        url = block['image']['external']['url']
    elif image_type == 'file':
        url = block['image']['file']['url']
    else:
        url = '#'
    caption = rich_text_to_html(block['image'].get('caption', []))
    return f'<figure data-block-id="{block_id}"><img src="{url}" alt="{caption}"><figcaption>{caption}</figcaption></figure>'

def code_to_html(block: Dict[str, Any], block_id: str) -> str:
    code = block['code']['rich_text'][0]['plain_text']
    language = block['code']['language']
    return f'<pre data-block-id="{block_id}"><code class="language-{language}">{code}</code></pre>'

def quote_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['quote']['rich_text'])
    return f'<blockquote data-block-id="{block_id}">{content}</blockquote>'

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database
    results = notion.databases.query(database_id=database_id).get("results")
    
    # Generate HTML for database entries
    entries_html = []
    for page in results:
        page_id = page['id']
        page_title = page['properties']['Name']['title'][0]['plain_text']
        page_url = f"{page_title.replace(' ', '_')}_{page_id}.html"
        entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
        
        # Generate individual page HTML
        page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
        page_html = notion_to_html(page_blocks, notion)
        
        # Write individual page HTML to file
        with open(page_url, 'w', encoding='utf-8') as f:
            f.write(f"<!DOCTYPE html><html><head><title>{page_title}</title></head><body>{page_html}</body></html>")
    
    # Return HTML for database listing
    return f'''
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    '''

def process_notion_page(notion: Client, page_id: str, output_dir: str = '.'):
    blocks = notion.blocks.children.list(block_id=page_id).get("results")
    html_output = notion_to_html(blocks, notion)
    
    # Get page title
    page = notion.pages.retrieve(page_id=page_id)
    page_title = page['properties']['title']['title'][0]['plain_text'] if 'title' in page['properties'] else 'Untitled'
    
    # Write main page HTML to file
    safe_title = "".join([c for c in page_title if c.isalnum() or c in (' ', '-', '_')]).rstrip()
    output_file = os.path.join(output_dir, f"{safe_title}_{page_id}.html")
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{page_title}</title></head><body>{html_output}</body></html>")
    
    print(f"Generated {output_file}")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

process_notion_page(notion, DATABASE_NOTION_ID, OUTPUT_DIR)

All of the filenames and H1s are 'Untitled'.

In [None]:
import os
from typing import List, Dict, Any
from notion_client import Client

def notion_to_html(blocks: List[Dict[str, Any]], notion: Client, output_dir: str) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type.startswith('heading_'):
            html.append(heading_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(f'<hr data-block-id="{block_id}">')
        elif block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

# ... (other helper functions remain the same)

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database with pagination
    has_more = True
    start_cursor = None
    entries_html = []
    
    while has_more:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # Adjust as needed
        )
        results = response.get("results", [])
        
        for page in results:
            page_id = page['id']
            page_properties = page['properties']
            page_title = get_page_title(page_properties)
            safe_title = get_safe_filename(page_title)
            page_url = f"{safe_title}_{page_id}.html"
            entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
            
            # Generate individual page HTML
            page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
            page_html = notion_to_html(page_blocks, notion, output_dir)
            
            # Write individual page HTML to file
            with open(os.path.join(output_dir, page_url), 'w', encoding='utf-8') as f:
                f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{page_title}</title>
</head>
<body>
    <h1>{page_title}</h1>
    {page_html}
</body>
</html>""")
        
        has_more = response.get("has_more", False)
        start_cursor = response.get("next_cursor")
    
    # Return HTML for database listing
    return f"""
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    """

def get_page_title(properties: Dict[str, Any]) -> str:
    # Try to get title from 'Name' property first
    name_prop = properties.get('Name', {})
    if name_prop and name_prop.get('title'):
        return name_prop['title'][0]['plain_text']
    
    # If 'Name' doesn't exist, look for a property of type 'title'
    for prop in properties.values():
        if prop['type'] == 'title' and prop['title']:
            return prop['title'][0]['plain_text']
    
    # If no title found, return 'Untitled'
    return 'Untitled'

def get_safe_filename(title: str) -> str:
    return "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip().replace(' ', '_')

def process_notion_page(notion: Client, page_id: str, output_dir: str = '.'):
    blocks = notion.blocks.children.list(block_id=page_id).get("results")
    
    # Get page title
    page = notion.pages.retrieve(page_id=page_id)
    page_title = get_page_title(page['properties'])
    
    # Generate HTML content
    html_output = f"<h1>{page_title}</h1>\n" + notion_to_html(blocks, notion, output_dir)
    
    # Write main page HTML to file
    safe_title = get_safe_filename(page_title)
    output_file = os.path.join(output_dir, f"{safe_title}_{page_id}.html")
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{page_title}</title>
</head>
<body>
    {html_output}
</body>
</html>""")
    
    print(f"Generated {output_file}")
    # 1/0



notion = Client(auth=NOTION_API_KEY)

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# process_notion_page(notion, DATABASE_NOTION_ID, OUTPUT_DIR)
process_notion_page(notion, FRONTPAGE_NOTION_ID, OUTPUT_DIR)

This is great.

It works if I feed in the DATABASE_NOTION_ID as the root, finding all the subpages recursively.

But I tried running it with the original FRONTPAGE_NOTION_ID - that is my homepage, and it contains the DATABASE_NOTION_ID page as a child. This time it didn't process recursively. It stopped after just a single page, and didn't even get to the database...

In [None]:
def notion_to_html(blocks: list, notion: Client, output_dir: str) -> str:
    html = []
    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'child_database':
            html.append(child_database_to_html(block, block_id))
        elif block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(bulleted_list_item_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id))
        # ... (include other block type handlers)
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def child_database_to_html(block: Dict[str, Any], block_id: str) -> str:
    title = block['child_database']['title']
    return f'<div data-block-id="{block_id}"><h3>Database: {title}</h3><p>See separate files for database entries.</p></div>'

def paragraph_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['paragraph']['rich_text'])
    return f'<p data-block-id="{block_id}">{content}</p>'

def bulleted_list_item_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['bulleted_list_item']['rich_text'])
    return f'<ul><li data-block-id="{block_id}">{content}</li></ul>'

def child_page_to_html(block: Dict[str, Any], block_id: str) -> str:
    title = block['child_page']['title']
    return f'<div data-block-id="{block_id}"><h3>Child Page: {title}</h3><p>See separate file for this child page.</p></div>'

def rich_text_to_html(rich_text: List[Dict[str, Any]]) -> str:
    html = []
    for text in rich_text:
        content = text['plain_text']
        link = text.get('href')
        annotations = text['annotations']

        if annotations['bold']:
            content = f'<strong>{content}</strong>'
        if annotations['italic']:
            content = f'<em>{content}</em>'
        if annotations['strikethrough']:
            content = f'<del>{content}</del>'
        if annotations['underline']:
            content = f'<u>{content}</u>'
        if annotations['code']:
            content = f'<code>{content}</code>'

        if link:
            content = f'<a href="{link}">{content}</a>'

        html.append(content)

    return ''.join(html)

# ... (include other block type handler functions)

def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = get_page_title(page)
    
    # Add title as H1 at the top
    html_content = f'<h1>{title}</h1>\n' + notion_to_html(blocks, notion, output_dir)
    
    # Create a safe filename
    safe_title = "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip()
    filename = f"{safe_title}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{title}</title></head><body>")
        f.write(html_content)
        f.write("</body></html>")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

def process_database(notion: Client, database_id: str, output_dir: str):
    database = notion.databases.retrieve(database_id)
    database_title = database['title'][0]['plain_text'] if database['title'] else 'Untitled Database'
    
    print(f"Processing database: {database_title}")
    
    # Query all items in the database with pagination
    has_more = True
    start_cursor = None
    while has_more:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # You can adjust this value
        )
        results = response['results']
        
        for item in results:
            item_id = item['id']
            item_title = get_page_title(item)
            print(f"Processing database item: {item_title}")
            
            # Process each database item as a page
            process_page_recursively(notion, item_id, output_dir)
        
        has_more = response['has_more']
        start_cursor = response.get('next_cursor')

def get_blocks(notion: Client, page_id: str) -> List[Dict[str, Any]]:
    all_blocks = []
    start_cursor = None
    
    while True:
        response = notion.blocks.children.list(
            block_id=page_id,
            start_cursor=start_cursor,
            page_size=100  # You can adjust this value
        )
        
        blocks = response['results']
        all_blocks.extend(blocks)
        
        if not response['has_more']:
            break
        
        start_cursor = response['next_cursor']
    
    return all_blocks

def get_page_title(page: Dict[str, Any]) -> str:
    if 'properties' in page:
        title_property = next((prop for prop in page['properties'].values() if prop['type'] == 'title'), None)
        if title_property:
            return title_property['title'][0]['plain_text'] if title_property['title'] else 'Untitled'
    return page.get('child_page', {}).get('title', 'Untitled')

Good. That fixed it. Please can we:

- Instead of Child Page: Startups. See separate file for this child page., please can we include a link from the title.
- In the database, instead of See separate files for database entries., please include a list of the entries with titles as links.
- Also handle table of contents, instead of Unsupported block type: table_of_contents
Thank you! This is going really well.

In [None]:
def notion_to_html(blocks: List[Dict[str, Any]], notion: Client, output_dir: str) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type.startswith('heading_'):
            html.append(heading_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id, output_dir))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(f'<hr data-block-id="{block_id}">')
        elif block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        elif block_type == 'table_of_contents':
            html.append(table_of_contents_to_html(block, block_id))
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

# ... (other helper functions remain the same)

def child_page_to_html(block: Dict[str, Any], block_id: str, output_dir: str) -> str:
    title = block['child_page']['title']
    safe_title = get_safe_filename(title)
    link = f"{safe_title}_{block_id}.html"
    return f'<div data-block-id="{block_id}"><h3>Child Page: <a href="{link}">{title}</a></h3></div>'

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database with pagination
    has_more = True
    start_cursor = None
    entries_html = []
    
    while has_more:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # Adjust as needed
        )
        results = response.get("results", [])
        
        for page in results:
            page_id = page['id']
            page_properties = page['properties']
            page_title = get_page_title(page_properties)
            safe_title = get_safe_filename(page_title)
            page_url = f"{safe_title}_{page_id}.html"
            entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
            
            # Generate individual page HTML
            page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
            page_html = notion_to_html(page_blocks, notion, output_dir)
            
            # Write individual page HTML to file
            with open(os.path.join(output_dir, page_url), 'w', encoding='utf-8') as f:
                f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{page_title}</title>
</head>
<body>
    <h1>{page_title}</h1>
    {page_html}
</body>
</html>""")
        
        has_more = response.get("has_more", False)
        start_cursor = response.get("next_cursor")
    
    # Return HTML for database listing
    return f"""
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    """

def table_of_contents_to_html(block: Dict[str, Any], block_id: str) -> str:
    return f'<div data-block-id="{block_id}"><nav><h2>Table of Contents</h2><p>Table of contents will be generated automatically.</p></nav></div>'

There was a minor error where a call to notion_to_html was missing the 3rd output_dir argument, but I was able to fix that.

And unfortunately we've lost the titles again - everything is Untitled

In [None]:
import os
from typing import Dict, Any, List
from notion_client import Client

def notion_to_html(blocks: list, notion: Client, output_dir: str) -> str:
    html = []
    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        elif block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(bulleted_list_item_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id, output_dir))
        elif block_type == 'table_of_contents':
            html.append(table_of_contents_to_html(block, block_id))
        # ... (include other block type handlers)
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database
    results = notion.databases.query(database_id=database_id).get("results")
    
    # Generate HTML for database entries
    entries_html = []
    for page in results:
        page_id = page['id']
        page_title = get_page_title(page)
        page_filename = f"{get_safe_filename(page_title)}_{page_id}.html"
        entries_html.append(f'<li><a href="{page_filename}">{page_title}</a></li>')
    
    # Return HTML for database listing
    return f'''
    <div data-block-id="{block_id}">
        <h3>Database: {database_title}</h3>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    '''

def paragraph_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['paragraph']['rich_text'])
    return f'<p data-block-id="{block_id}">{content}</p>'

def bulleted_list_item_to_html(block: Dict[str, Any], block_id: str) -> str:
    content = rich_text_to_html(block['bulleted_list_item']['rich_text'])
    return f'<ul><li data-block-id="{block_id}">{content}</li></ul>'

def child_page_to_html(block: Dict[str, Any], block_id: str, output_dir: str) -> str:
    title = block['child_page']['title']
    filename = f"{get_safe_filename(title)}_{block_id}.html"
    return f'<div data-block-id="{block_id}"><h3>Child Page: <a href="{filename}">{title}</a></h3></div>'

def table_of_contents_to_html(block: Dict[str, Any], block_id: str) -> str:
    return f'<div data-block-id="{block_id}"><nav><h4>Table of Contents</h4><ul id="table-of-contents"></ul></nav></div>'

def rich_text_to_html(rich_text: List[Dict[str, Any]]) -> str:
    html = []
    for text in rich_text:
        content = text['plain_text']
        link = text.get('href')
        annotations = text['annotations']

        if annotations['bold']:
            content = f'<strong>{content}</strong>'
        if annotations['italic']:
            content = f'<em>{content}</em>'
        if annotations['strikethrough']:
            content = f'<del>{content}</del>'
        if annotations['underline']:
            content = f'<u>{content}</u>'
        if annotations['code']:
            content = f'<code>{content}</code>'

        if link:
            content = f'<a href="{link}">{content}</a>'

        html.append(content)

    return ''.join(html)

def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = get_page_title(page)
    
    # Add title as H1 at the top
    html_content = f'<h1>{title}</h1>\n' + notion_to_html(blocks, notion, output_dir)
    
    # Create a safe filename
    filename = f"{get_safe_filename(title)}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{title}</title></head><body>")
        f.write(html_content)
        f.write("</body></html>")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

def process_database(notion: Client, database_id: str, output_dir: str):
    database = notion.databases.retrieve(database_id)
    database_title = database['title'][0]['plain_text'] if database['title'] else 'Untitled Database'
    
    print(f"Processing database: {database_title}")
    
    # Query all items in the database with pagination
    has_more = True
    start_cursor = None
    while has_more:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # You can adjust this value
        )
        results = response['results']
        
        for item in results:
            item_id = item['id']
            item_title = get_page_title(item)
            print(f"Processing database item: {item_title}")
            
            # Process each database item as a page
            process_page_recursively(notion, item_id, output_dir)
        
        has_more = response['has_more']
        start_cursor = response.get('next_cursor')

def get_blocks(notion: Client, page_id: str) -> List[Dict[str, Any]]:
    all_blocks = []
    start_cursor = None
    
    while True:
        response = notion.blocks.children.list(
            block_id=page_id,
            start_cursor=start_cursor,
            page_size=100  # You can adjust this value
        )
        
        blocks = response['results']
        all_blocks.extend(blocks)
        
        if not response['has_more']:
            break
        
        start_cursor = response['next_cursor']
    
    return all_blocks

def get_page_title(page: Dict[str, Any]) -> str:
    if 'properties' in page:
        title_property = next((prop for prop in page['properties'].values() if prop['type'] == 'title'), None)
        if title_property and title_property['title']:
            return title_property['title'][0]['plain_text']
    elif 'child_page' in page:
        return page['child_page']['title']
    return 'Untitled'

def get_safe_filename(title: str) -> str:
    return "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip().replace(' ', '_')

That is a big improvement.

Please fix:
* Even though it is outputting all of the files, the database page that links to them only includes half of them.
* Unsupported block type: divider

In [None]:
import os
from typing import List, Dict, Any
from notion_client import Client

def notion_to_html(blocks: List[Dict[str, Any]], notion: Client, output_dir: str) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type.startswith('heading_'):
            html.append(heading_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id, output_dir))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(divider_to_html(block_id))
        elif block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        elif block_type == 'table_of_contents':
            html.append(table_of_contents_to_html(block, block_id))
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

# ... (other helper functions remain the same)

def get_page_title(page: Dict[str, Any]) -> str:
    if 'properties' in page:
        for prop in page['properties'].values():
            if prop['type'] == 'title' and prop['title']:
                return prop['title'][0]['plain_text']
    elif 'child_page' in page:
        return page['child_page']['title']
    return 'Untitled'

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database with pagination
    entries_html = []
    start_cursor = None
    
    while True:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # Adjust as needed
        )
        results = response.get("results", [])
        
        for page in results:
            page_id = page['id']
            page_title = get_page_title(page)
            safe_title = get_safe_filename(page_title)
            page_url = f"{safe_title}_{page_id}.html"
            entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
            
            # Generate individual page HTML
            page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
            page_html = notion_to_html(page_blocks, notion, output_dir)
            
            # Write individual page HTML to file
            with open(os.path.join(output_dir, page_url), 'w', encoding='utf-8') as f:
                f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{page_title}</title>
</head>
<body>
    <h1>{page_title}</h1>
    {page_html}
</body>
</html>""")
        
        if not response.get("has_more"):
            break
        start_cursor = response.get("next_cursor")
    
    # Return HTML for database listing
    return f"""
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    """

def divider_to_html(block_id: str) -> str:
    return f'<hr data-block-id="{block_id}">'

# ... (rest of the code remains the same)

def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = get_page_title(page)
    
    # Add title as H1 at the top
    html_content = f'<h1>{title}</h1>\n' + notion_to_html(blocks, notion, output_dir)
    
    # Create a safe filename
    safe_title = get_safe_filename(title)
    filename = f"{safe_title}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{title}</title>
</head>
<body>
    {html_content}
</body>
</html>""")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

def get_safe_filename(title: str) -> str:
    return "".join([c for c in title if c.isalnum() or c in (' ', '-', '_')]).rstrip().replace(' ', '_')

# ... (rest of the code remains the same)

In [None]:
# that was 240809_0333. it worked pretty well. now i'm asking for database fields to be included, and testing anchor links

Please include the database fields at the top of each database child page.

In [None]:
import os
from typing import Dict, Any, List
from notion_client import Client

def notion_to_html(blocks: list, notion: Client, output_dir: str) -> str:
    html = []
    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        elif block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(bulleted_list_item_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id, output_dir))
        elif block_type == 'table_of_contents':
            html.append(table_of_contents_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(divider_to_html(block, block_id))
        # ... (include other block type handlers)
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database with pagination
    entries_html = []
    start_cursor = None
    
    while True:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # Adjust as needed
        )
        results = response.get("results", [])
        
        for page in results:
            page_id = page['id']
            page_title = get_page_title(page)
            page_filename = f"{get_safe_filename(page_title)}_{page_id}.html"
            entries_html.append(f'<li><a href="{page_filename}">{page_title}</a></li>')
        
        if not response.get("has_more", False):
            break
        
        start_cursor = response.get("next_cursor")
    
    # Return HTML for database listing
    return f'''
    <div data-block-id="{block_id}">
        <h3>Database: {database_title}</h3>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    '''

# ... (other helper functions remain the same)

def divider_to_html(block: Dict[str, Any], block_id: str) -> str:
    return f'<hr data-block-id="{block_id}">'

def process_page_recursively(notion: Client, page_id: str, output_dir: str, database_properties=None):
    blocks = get_blocks(notion, page_id)
    
    # Get page title and properties
    page = notion.pages.retrieve(page_id)
    title = get_page_title(page)
    
    # Generate HTML content
    html_content = f'<h1>{title}</h1>\n'
    
    # Add database properties if available
    if database_properties:
        html_content += '<div class="database-properties">\n'
        html_content += '<h2>Properties</h2>\n'
        html_content += '<dl>\n'
        for prop_name, prop_value in database_properties.items():
            html_content += f'<dt>{prop_name}</dt><dd>{prop_value}</dd>\n'
        html_content += '</dl>\n'
        html_content += '</div>\n'
    
    html_content += notion_to_html(blocks, notion, output_dir)
    
    # Create a safe filename
    filename = f"{get_safe_filename(title)}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"<!DOCTYPE html><html><head><title>{title}</title></head><body>")
        f.write(html_content)
        f.write("</body></html>")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

def process_database(notion: Client, database_id: str, output_dir: str):
    database = notion.databases.retrieve(database_id)
    database_title = database['title'][0]['plain_text'] if database['title'] else 'Untitled Database'
    
    print(f"Processing database: {database_title}")
    
    # Query all items in the database with pagination
    start_cursor = None
    while True:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # You can adjust this value
        )
        results = response['results']
        
        for item in results:
            item_id = item['id']
            item_title = get_page_title(item)
            print(f"Processing database item: {item_title}")
            
            # Extract database properties
            database_properties = {}
            for prop_name, prop_data in item['properties'].items():
                prop_type = prop_data['type']
                if prop_type == 'rich_text':
                    prop_value = ' '.join([text['plain_text'] for text in prop_data['rich_text']])
                elif prop_type == 'title':
                    prop_value = ' '.join([text['plain_text'] for text in prop_data['title']])
                elif prop_type == 'select':
                    prop_value = prop_data['select']['name'] if prop_data['select'] else ''
                elif prop_type == 'multi_select':
                    prop_value = ', '.join([select['name'] for select in prop_data['multi_select']])
                elif prop_type == 'date':
                    prop_value = prop_data['date']['start'] if prop_data['date'] else ''
                elif prop_type == 'checkbox':
                    prop_value = 'Yes' if prop_data['checkbox'] else 'No'
                else:
                    prop_value = str(prop_data.get(prop_type, ''))
                
                database_properties[prop_name] = prop_value
            
            # Process each database item as a page
            process_page_recursively(notion, item_id, output_dir, database_properties)
        
        if not response.get('has_more', False):
            break
        
        start_cursor = response.get('next_cursor')

# ... (rest of the code remains the same)

Please:
- Add support for all heading levels
- Add support for anchor links (i.e. to other blocks within the page, or on a different page)

In [None]:
import os
from typing import List, Dict, Any
from notion_client import Client

def notion_to_html(blocks: List[Dict[str, Any]], notion: Client, output_dir: str, page_id: str) -> str:
    html = []

    for block in blocks:
        block_id = block['id']
        block_type = block['type']

        if block_type == 'paragraph':
            html.append(paragraph_to_html(block, block_id))
        elif block_type.startswith('heading_'):
            html.append(heading_to_html(block, block_id))
        elif block_type == 'bulleted_list_item':
            html.append(list_item_to_html(block, block_id, 'ul'))
        elif block_type == 'numbered_list_item':
            html.append(list_item_to_html(block, block_id, 'ol'))
        elif block_type == 'to_do':
            html.append(todo_to_html(block, block_id))
        elif block_type == 'toggle':
            html.append(toggle_to_html(block, block_id))
        elif block_type == 'child_page':
            html.append(child_page_to_html(block, block_id, output_dir))
        elif block_type == 'image':
            html.append(image_to_html(block, block_id))
        elif block_type == 'code':
            html.append(code_to_html(block, block_id))
        elif block_type == 'quote':
            html.append(quote_to_html(block, block_id))
        elif block_type == 'divider':
            html.append(divider_to_html(block, block_id))
        elif block_type == 'child_database':
            html.append(child_database_to_html(block, block_id, notion, output_dir))
        elif block_type == 'table_of_contents':
            html.append(table_of_contents_to_html(block, block_id))
        else:
            html.append(f'<div data-block-id="{block_id}">Unsupported block type: {block_type}</div>')

    return '\n'.join(html)

def heading_to_html(block: Dict[str, Any], block_id: str) -> str:
    heading_level = int(block['type'][-1])
    content = rich_text_to_html(block[block['type']]['rich_text'])
    anchor = f"heading-{block_id}"
    return f'<h{heading_level} id="{anchor}" data-block-id="{block_id}">{content}</h{heading_level}>'

def rich_text_to_html(rich_text: List[Dict[str, Any]]) -> str:
    html = []
    for text in rich_text:
        content = text['plain_text']
        link = text.get('href')
        annotations = text['annotations']

        if annotations['bold']:
            content = f'<strong>{content}</strong>'
        if annotations['italic']:
            content = f'<em>{content}</em>'
        if annotations['strikethrough']:
            content = f'<del>{content}</del>'
        if annotations['underline']:
            content = f'<u>{content}</u>'
        if annotations['code']:
            content = f'<code>{content}</code>'

        if link:
            if link.startswith('/'):
                # Internal link to another page
                page_id = link.split('/')[-1]
                content = f'<a href="{get_safe_filename(content)}_{page_id}.html">{content}</a>'
            elif link.startswith('#'):
                # Anchor link within the same page
                block_id = link[1:]
                content = f'<a href="#heading-{block_id}">{content}</a>'
            else:
                # External link
                content = f'<a href="{link}">{content}</a>'

        html.append(content)

    return ''.join(html)

def child_database_to_html(block: Dict[str, Any], block_id: str, notion: Client, output_dir: str) -> str:
    database_id = block['id']
    database_title = block['child_database']['title']
    
    # Query the database with pagination
    entries_html = []
    start_cursor = None
    
    while True:
        response = notion.databases.query(
            database_id=database_id,
            start_cursor=start_cursor,
            page_size=100  # Adjust as needed
        )
        results = response.get("results", [])
        
        for page in results:
            page_id = page['id']
            page_title = get_page_title(page)
            safe_title = get_safe_filename(page_title)
            page_url = f"{safe_title}_{page_id}.html"
            entries_html.append(f'<li><a href="{page_url}">{page_title}</a></li>')
            
            # Generate individual page HTML
            page_blocks = notion.blocks.children.list(block_id=page_id).get("results")
            page_html = notion_to_html(page_blocks, notion, output_dir, page_id)
            
            # Get database fields
            fields_html = get_database_fields_html(page['properties'])
            
            # Write individual page HTML to file
            with open(os.path.join(output_dir, page_url), 'w', encoding='utf-8') as f:
                f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{page_title}</title>
</head>
<body>
    <h1>{page_title}</h1>
    {fields_html}
    {page_html}
</body>
</html>""")
        
        if not response.get("has_more"):
            break
        start_cursor = response.get("next_cursor")
    
    # Return HTML for database listing
    return f"""
    <div data-block-id="{block_id}">
        <h2>{database_title}</h2>
        <ul>
            {''.join(entries_html)}
        </ul>
    </div>
    """

def get_database_fields_html(properties: Dict[str, Any]) -> str:
    fields_html = ['<div class="database-fields">']
    for prop_name, prop_value in properties.items():
        if prop_name.lower() != 'name':  # Exclude the 'Name' field as it's already used as the title
            field_value = format_property_value(prop_value)
            fields_html.append(f'<p><strong>{prop_name}:</strong> {field_value}</p>')
    fields_html.append('</div>')
    return '\n'.join(fields_html)

def format_property_value(prop: Dict[str, Any]) -> str:
    prop_type = prop['type']
    if prop_type == 'rich_text':
        return rich_text_to_html(prop['rich_text'])
    elif prop_type == 'number':
        return str(prop['number'])
    elif prop_type == 'select':
        return prop['select']['name'] if prop['select'] else ''
    elif prop_type == 'multi_select':
        return ', '.join([option['name'] for option in prop['multi_select']])
    elif prop_type == 'date':
        date = prop['date']
        if date:
            return f"{date['start']} - {date['end']}" if date.get('end') else date['start']
        return ''
    elif prop_type == 'people':
        return ', '.join([person['name'] for person in prop['people']])
    elif prop_type == 'files':
        return ', '.join([file['name'] for file in prop['files']])
    elif prop_type == 'checkbox':
        return 'Yes' if prop['checkbox'] else 'No'
    elif prop_type == 'url':
        return f'<a href="{prop["url"]}">{prop["url"]}</a>' if prop['url'] else ''
    elif prop_type == 'email':
        return f'<a href="mailto:{prop["email"]}">{prop["email"]}</a>' if prop['email'] else ''
    elif prop_type == 'phone_number':
        return prop['phone_number'] or ''
    elif prop_type == 'formula':
        return str(prop['formula'].get('string', ''))
    else:
        return 'Unsupported property type'

# ... (rest of the code remains the same)

def process_page_recursively(notion: Client, page_id: str, output_dir: str):
    blocks = get_blocks(notion, page_id)
    
    # Get page title
    page = notion.pages.retrieve(page_id)
    title = get_page_title(page)
    
    # Add title as H1 at the top
    html_content = f'<h1>{title}</h1>\n' + notion_to_html(blocks, notion, output_dir, page_id)
    
    # Create a safe filename
    safe_title = get_safe_filename(title)
    filename = f"{safe_title}_{page_id}.html"
    
    # Write the HTML file
    with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
        f.write(f"""<!DOCTYPE html>
<html>
<head>
    <title>{title}</title>
</head>
<body>
    {html_content}
</body>
</html>""")
    
    print(f"Created file: {filename}")
    
    # Process child pages and databases
    for block in blocks:
        if block['type'] == 'child_page':
            child_page_id = block['id']
            process_page_recursively(notion, child_page_id, output_dir)
        elif block['type'] == 'child_database':
            process_database(notion, block['id'], output_dir)

# ... (rest of the code remains the same)

In [None]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

process_page_recursively(notion, FRONTPAGE_NOTION_ID, OUTPUT_DIR)