In [1]:
# Notion to Pandas
# https://www.pynotion.com/load-in-pandas/

# Notion Page to read
# https://ericriddoch.notion.site/ba1cb99f6e874dca88521bd0f46bea70?v=f18ec5e90d2e4bd7ad7612948ae61843

# Notion API
# https://developers.notion.com/

In [None]:
def format_string_to_uuid(input_string):
    """
    Converts a 32-character string into an 8-4-4-12 UUID format.

    Parameters:
    input_string (str): A 32-character string.

    Returns:
    str: The formatted string in 8-4-4-12 UUID format.
    """
    if len(input_string) != 32:
        raise ValueError("Input string must be 32 characters long.")
    
    return f"{input_string[:8]}-{input_string[8:12]}-{input_string[12:16]}-{input_string[16:20]}-{input_string[20:]}"

# Example usage
example_string = "11cfd102e3ea8010a07fe0b672f50f53"
formatted_string = format_string_to_uuid(example_string)
print(formatted_string)  # Output: 668d797c-76fa-4934-9b05-ad288df2d136


11cfd102-e3ea-8010-a07f-e0b672f50f53


In [19]:
import os
import requests
import pandas as pd
from urllib.parse import urljoin

class NotionClient:
    def __init__(self, notion_key):
        self.notion_key = notion_key
        self.default_headers = {
            'Authorization': f"Bearer {self.notion_key}",
            'Content-Type': 'application/json',
            'Notion-Version': '2022-06-28'
        }
        self.session = requests.Session()
        self.session.headers.update(self.default_headers)
        self.NOTION_BASE_URL = "https://api.notion.com/v1/"
        
    def retrieve_page_content(self, page_id, start_cursor=None):
        page_url = urljoin(self.NOTION_BASE_URL, f"blocks/{page_id}/children")
        params = {}
        if start_cursor is not None:
            params["start_cursor"] = start_cursor
        return self.session.get(page_url, params=params)

class PandasConverter:
    def response_to_records(self, page_response):
        records = []
        for result in page_response["results"]:
            record = self.get_record(result)
            if record:
                records.append(record)
        return records    

    def get_record(self, result):
        record = {}
        block_type = result.get("type")
        if block_type == "paragraph":
            record["text"] = self.get_text(result["paragraph"])
        elif block_type == "heading_1":
            record["text"] = self.get_text(result["heading_1"])
        elif block_type == "heading_2":
            record["text"] = self.get_text(result["heading_2"])
        elif block_type == "heading_3":
            record["text"] = self.get_text(result["heading_3"])
        # Add more block types as needed
        return record
    
    def get_text(self, text_object):
        text = ""
        for rt in text_object.get("rich_text", []):
            text += rt.get("plain_text", "")
        return text

class PandasLoader:
    def __init__(self, notion_client, pandas_converter):
        self.notion_client = notion_client
        self.converter = pandas_converter
        
    def load_page(self, page_id):
        page_count = 1
        print(f"Loading page {page_count}")
        page_response = self.notion_client.retrieve_page_content(page_id)
        records = []
        if page_response.ok:
            page_response_obj = page_response.json()
            records.extend(self.converter.response_to_records(page_response_obj))
            
            while page_response_obj.get("has_more"):
                page_count += 1
                print(f"Loading page {page_count}")
                start_cursor = page_response_obj.get("next_cursor")
                page_response = self.notion_client.retrieve_page_content(page_id, start_cursor=start_cursor)
                if page_response.ok:
                    page_response_obj = page_response.json()
                    records.extend(self.converter.response_to_records(page_response_obj))
        return pd.DataFrame(records)

# Example usage
def notion_page_to_dataframe(notion_key, page_id):
    client = NotionClient(notion_key)
    converter = PandasConverter()
    loader = PandasLoader(client, converter)
    return loader.load_page(page_id)

In [20]:
# Usage
notion_key = os.environ.get("NOTION_KEY")
page_id = "11cfd102-e3ea-8010-a07f-e0b672f50f53"
df = notion_page_to_dataframe(notion_key, page_id)
print(df)

Loading page 1
Loading page 2
                                                 text
0                      Section 1: Course Introduction
1                        Section 2: Environment Setup
2                                           Untitled 
3          Section 3: Improving the terminal with ZSH
4        Section 4: Managing multiple Python versions
5   Section 5: VS Code: shortcuts, auto-completion...
6                          Section 6: Git and VS Code
7                   Section 7: GitHub and Code Review
8   Section 8: Continuous Integration — clean code...
9   Link to notes in Github https://github.com/phi...
10                                          Untitled 
11                                          Untitled 
12                                          Untitled 
13                                          Untitled 
14                                          Untitled 
15  Section 9: Continuous Integration - The pre-co...
16                         Section 10: GitHub Action

[Taking-Python-to-Production-A-Professional-Onboarding-Guide](https://www.notion.so/Taking-Python-to-Production-A-Professional-Onboarding-Guide-11cfd102e3ea8010a07fe0b672f50f53)

In [21]:
df

Unnamed: 0,text
0,Section 1: Course Introduction
1,Section 2: Environment Setup
2,Untitled
3,Section 3: Improving the terminal with ZSH
4,Section 4: Managing multiple Python versions
5,"Section 5: VS Code: shortcuts, auto-completion..."
6,Section 6: Git and VS Code
7,Section 7: GitHub and Code Review
8,Section 8: Continuous Integration — clean code...
9,Link to notes in Github https://github.com/phi...


Course Videos not able to grab as they are view (I believe)

* [Py2Prod](https://ericriddoch.notion.site/22cff7ecfb92442fb317341fe5c7299b?v=c80f0354ebf1436a815782e6ec70d423)
* [Cloud Engineering](https://ericriddoch.notion.site/ba1cb99f6e874dca88521bd0f46bea70?v=f18ec5e90d2e4bd7ad7612948ae61843)

In [26]:
print(format_string_to_uuid(
    "c80f0354ebf1436a815782e6ec70d423"
    ))

c80f0354-ebf1-436a-8157-82e6ec70d423


In [28]:
df = notion_page_to_dataframe(
    notion_key,
    "c80f0354-ebf1-436a-8157-82e6ec70d423"
    )
df

Loading page 1
