In [None]:
# !pip install -e ..

In [None]:
import collections
import datetime
import pathlib
import tqdm

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from assistance.api.keys import get_notion_api_key

In [None]:
API_KEY = get_notion_api_key()
PARENT_PAGE_ID = "ad816892782d478d9998f700a5c783be"

In [None]:
import aiohttp

In [None]:
async def get_all_sub_blocks(block_id):
    parameters = ""
    has_more = True
    results = []
    
    async with aiohttp.ClientSession() as session:
        while True:
            url = f"https://api.notion.com/v1/blocks/{block_id}/children{parameters}"

            headers = {
                "accept": "application/json",
                "Notion-Version": "2022-06-28",
                "Authorization": f"Bearer {API_KEY}",
            }

            async with session.get(url, headers=headers) as resp:
                json = await resp.json()

            results += json['results']
            has_more = json['has_more']

            if not has_more:
                return results

            parameters = f"?start_cursor={json['next_cursor']}"

In [None]:
async def get_transcript(block_id):
    a_page = await get_all_sub_blocks(block_id)
    
    transcript_items = []

    for item in a_page:
        for rich_text in item['paragraph']['rich_text']:
            transcript_items.append(rich_text['plain_text'])

    transcript = "\n\n".join(transcript_items)
    
    return transcript

In [None]:
results = await get_all_sub_blocks(PARENT_PAGE_ID)

In [None]:
pages = {
    item['id']: {'title': item['child_page']['title'], 'created_time': item['created_time']}
    for item in results
    if item['type'] == 'child_page'
}

In [None]:
transcripts_dir = pathlib.Path.home() / ".assistance.chat" / "transcripts"

In [None]:
for page_id, details in tqdm.tqdm(pages.items()):
    user_id = details['title']
    timestamp = details['created_time']
    
    user_dir = transcripts_dir / user_id
    user_dir.mkdir(exist_ok=True, parents=True)
    
    filename = timestamp.replace("-", "").replace(":", "").replace(".", "_")
    filepath = user_dir / filename
    
    if filepath.exists():
        continue
    
    transcript = await get_transcript(page_id)
    
    with open(filepath, "w") as f:
        f.write(transcript)