In [None]:
import requests
import json
from datetime import datetime

class NotionExtractor:
    def __init__(self, integration_token):
        """
        Inicializa el extractor de Notion
        
        Args:
            integration_token (str): Token de integración de Notion
        """
        self.token = integration_token
        self.headers = {
            "Authorization": f"Bearer {integration_token}",
            "Content-Type": "application/json",
            "Notion-Version": "2022-06-28"
        }
        self.base_url = "https://api.notion.com/v1"
    
    def get_databases(self):
        """
        Obtiene todas las bases de datos accesibles
        """
        url = f"{self.base_url}/search"
        data = {
            "filter": {
                "property": "object",
                "value": "database"
            }
        }
        
        response = requests.post(url, headers=self.headers, json=data)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error al obtener databases: {response.status_code}")
            print(response.text)
            return None
    
    def get_database_info(self, database_id):
        """
        Obtiene información detallada de una base de datos específica
        
        Args:
            database_id (str): ID de la base de datos
        """
        url = f"{self.base_url}/databases/{database_id}"
        
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error al obtener info de database: {response.status_code}")
            print(response.text)
            return None
    
    def get_database_pages(self, database_id, page_size=100):
        """
        Obtiene todas las páginas de una base de datos
        
        Args:
            database_id (str): ID de la base de datos
            page_size (int): Número de páginas por solicitud
        """
        url = f"{self.base_url}/databases/{database_id}/query"
        
        all_pages = []
        has_more = True
        start_cursor = None
        
        while has_more:
            data = {"page_size": page_size}
            if start_cursor:
                data["start_cursor"] = start_cursor
            
            response = requests.post(url, headers=self.headers, json=data)
            
            if response.status_code == 200:
                result = response.json()
                all_pages.extend(result.get("results", []))
                has_more = result.get("has_more", False)
                start_cursor = result.get("next_cursor")
            else:
                print(f"Error al obtener páginas: {response.status_code}")
                print(response.text)
                break
        
        return all_pages
    
    def get_page_details(self, page_id):
        """
        Obtiene detalles completos de una página específica
        
        Args:
            page_id (str): ID de la página
        """
        url = f"{self.base_url}/pages/{page_id}"
        
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error al obtener detalles de página: {response.status_code}")
            print(response.text)
            return None
    
    def extract_properties(self, page_data):
        """
        Extrae y formatea las propiedades de una página
        
        Args:
            page_data (dict): Datos de la página de Notion
        """
        properties = {}
        
        if "properties" in page_data:
            for prop_name, prop_data in page_data["properties"].items():
                prop_type = prop_data.get("type")
                
                if prop_type == "title":
                    properties[prop_name] = self._extract_title(prop_data)
                elif prop_type == "rich_text":
                    properties[prop_name] = self._extract_rich_text(prop_data)
                elif prop_type == "number":
                    properties[prop_name] = prop_data.get("number")
                elif prop_type == "select":
                    properties[prop_name] = self._extract_select(prop_data)
                elif prop_type == "multi_select":
                    properties[prop_name] = self._extract_multi_select(prop_data)
                elif prop_type == "date":
                    properties[prop_name] = self._extract_date(prop_data)
                elif prop_type == "checkbox":
                    properties[prop_name] = prop_data.get("checkbox")
                elif prop_type == "url":
                    properties[prop_name] = prop_data.get("url")
                elif prop_type == "email":
                    properties[prop_name] = prop_data.get("email")
                elif prop_type == "phone_number":
                    properties[prop_name] = prop_data.get("phone_number")
                elif prop_type == "formula":
                    properties[prop_name] = self._extract_formula(prop_data)
                elif prop_type == "relation":
                    properties[prop_name] = self._extract_relation(prop_data)
                elif prop_type == "rollup":
                    properties[prop_name] = self._extract_rollup(prop_data)
                elif prop_type == "people":
                    properties[prop_name] = self._extract_people(prop_data)
                elif prop_type == "files":
                    properties[prop_name] = self._extract_files(prop_data)
                elif prop_type == "created_time":
                    properties[prop_name] = prop_data.get("created_time")
                elif prop_type == "last_edited_time":
                    properties[prop_name] = prop_data.get("last_edited_time")
                elif prop_type == "created_by":
                    properties[prop_name] = self._extract_user(prop_data.get("created_by"))
                elif prop_type == "last_edited_by":
                    properties[prop_name] = self._extract_user(prop_data.get("last_edited_by"))
                else:
                    properties[prop_name] = f"Tipo no soportado: {prop_type}"
        
        return properties
    
    def _extract_title(self, prop_data):
        """Extrae texto de propiedades title"""
        if prop_data.get("title"):
            return "".join([text.get("plain_text", "") for text in prop_data["title"]])
        return ""
    
    def _extract_rich_text(self, prop_data):
        """Extrae texto de propiedades rich_text"""
        if prop_data.get("rich_text"):
            return "".join([text.get("plain_text", "") for text in prop_data["rich_text"]])
        return ""
    
    def _extract_select(self, prop_data):
        """Extrae valor de propiedades select"""
        select_data = prop_data.get("select")
        return select_data.get("name") if select_data else None
    
    def _extract_multi_select(self, prop_data):
        """Extrae valores de propiedades multi_select"""
        multi_select_data = prop_data.get("multi_select", [])
        return [item.get("name") for item in multi_select_data]
    
    def _extract_date(self, prop_data):
        """Extrae fecha de propiedades date"""
        date_data = prop_data.get("date")
        if date_data:
            return {
                "start": date_data.get("start"),
                "end": date_data.get("end")
            }
        return None
    
    def _extract_formula(self, prop_data):
        """Extrae resultado de propiedades formula"""
        formula_data = prop_data.get("formula")
        if formula_data:
            formula_type = formula_data.get("type")
            return formula_data.get(formula_type)
        return None
    
    def _extract_relation(self, prop_data):
        """Extrae IDs de propiedades relation"""
        relation_data = prop_data.get("relation", [])
        return [item.get("id") for item in relation_data]
    
    def _extract_rollup(self, prop_data):
        """Extrae datos de propiedades rollup"""
        rollup_data = prop_data.get("rollup")
        if rollup_data:
            rollup_type = rollup_data.get("type")
            return rollup_data.get(rollup_type)
        return None
    
    def _extract_people(self, prop_data):
        """Extrae información de propiedades people"""
        people_data = prop_data.get("people", [])
        return [self._extract_user(person) for person in people_data]
    
    def _extract_files(self, prop_data):
        """Extrae archivos de propiedades files"""
        files_data = prop_data.get("files", [])
        return [{"name": file.get("name"), "url": file.get("file", {}).get("url")} for file in files_data]
    
    def _extract_user(self, user_data):
        """Extrae información de usuario"""
        if user_data:
            return {
                "id": user_data.get("id"),
                "name": user_data.get("name"),
                "avatar_url": user_data.get("avatar_url")
            }
        return None
    
    def find_database_by_name(self, name):
        """
        Busca una base de datos por nombre
        
        Args:
            name (str): Nombre de la base de datos a buscar
        """
        databases = self.get_databases()
        if databases:
            for db in databases.get("results", []):
                db_title = ""
                if db.get("title"):
                    db_title = "".join([text.get("plain_text", "") for text in db["title"]])
                
                if name.lower() in db_title.lower():
                    return db
        return None
    
    def export_to_json(self, data, filename):
        """
        Exporta datos a archivo JSON
        
        Args:
            data: Datos a exportar
            filename (str): Nombre del archivo
        """
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"Datos exportados a {filename}")

# Ejemplo de uso
def main():
    # Reemplaza con tu token de integración
    NOTION_TOKEN = "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"
    
    # Inicializar extractor
    extractor = NotionExtractor(NOTION_TOKEN)
    
    # Buscar la base de datos "Archivo"
    archivo_db = extractor.find_database_by_name("Pagos")
    
    if archivo_db:
        print(f"Base de datos encontrada: {archivo_db['id']}")
        
        # Obtener información detallada de la base de datos
        db_info = extractor.get_database_info(archivo_db['id'])
        print(f"Propiedades de la base de datos:")
        for prop_name, prop_info in db_info.get('properties', {}).items():
            print(f"  - {prop_name}: {prop_info['type']}")
        
        # Obtener todas las páginas
        pages = extractor.get_database_pages(archivo_db['id'])
        print(f"Páginas encontradas: {len(pages)}")
        
        # Extraer propiedades de cada página
        extracted_data = []
        for page in pages:
            page_properties = extractor.extract_properties(page)
            extracted_data.append({
                'id': page['id'],
                'created_time': page.get('created_time'),
                'last_edited_time': page.get('last_edited_time'),
                'properties': page_properties
            })
        
        # Exportar a JSON
        extractor.export_to_json(extracted_data, 'archivo_pages.json')
        
        # Mostrar ejemplo de datos extraídos
        if extracted_data:
            print("\nEjemplo de datos extraídos:")
            print(json.dumps(extracted_data[0], indent=2, ensure_ascii=False))
    
    else:
        print("Base de datos 'Archivo' no encontrada")

if __name__ == "__main__":
    main()

Base de datos encontrada: 232cc9fd-ffca-80b5-b7ba-e80a8e5fd74c
Propiedades de la base de datos:
  - Created at: date
  - Updated at: date
  - Actualizar Archivo: button
  - Archivo: title
Páginas encontradas: 2
Datos exportados a archivo_pages.json

Ejemplo de datos extraídos:
{
  "id": "233cc9fd-ffca-80e6-8b0e-c9d3f16ab9f0",
  "created_time": "2025-07-17T00:05:00.000Z",
  "last_edited_time": "2025-07-17T00:08:00.000Z",
  "properties": {
    "Created at": {
      "start": "2025-07-13",
      "end": null
    },
    "Updated at": {
      "start": "2025-07-13",
      "end": null
    },
    "Actualizar Archivo": "Tipo no soportado: button",
    "Archivo": "[GUIA] Como reportar un pago"
  }
}


In [20]:
from notion_client import Client
import os

notion = Client(auth="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")

page_id = "232cc9fd-ffca-806e-817e-e0d0f84a3b7e"  # ← ID correcto

# Obtener todos los bloques hijos
blocks = []
cursor = None


response = notion.blocks.children.list(block_id=page_id)

response

{'object': 'list',
 'results': [{'object': 'block',
   'id': '232cc9fd-ffca-8002-9b08-d58ed641ef25',
   'parent': {'type': 'page_id',
    'page_id': '232cc9fd-ffca-806e-817e-e0d0f84a3b7e'},
   'created_time': '2025-07-16T18:12:00.000Z',
   'last_edited_time': '2025-07-16T21:20:00.000Z',
   'created_by': {'object': 'user',
    'id': '152d872b-594c-81d7-947d-0002c201748e'},
   'last_edited_by': {'object': 'user',
    'id': '152d872b-594c-81d7-947d-0002c201748e'},
   'has_children': False,
   'archived': False,
   'in_trash': False,
   'type': 'heading_1',
   'heading_1': {'rich_text': [{'type': 'text',
      'text': {'content': 'Pregunta 1', 'link': None},
      'annotations': {'bold': False,
       'italic': False,
       'strikethrough': False,
       'underline': False,
       'code': False,
       'color': 'default'},
      'plain_text': 'Pregunta 1',
      'href': None}],
    'is_toggleable': False,
    'color': 'default'}},
  {'object': 'block',
   'id': '232cc9fd-ffca-808b-8a1c-cd

In [18]:
from notion_client import Client
import os

notion = Client(auth="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")

page_id = "232cc9fd-ffca-806e-817e-e0d0f84a3b7e"  # ← ID correcto

# Obtener todos los bloques hijos
blocks = []
cursor = None

while True:
    response = notion.blocks.children.list(block_id=page_id, start_cursor=cursor)
    blocks.extend(response["results"])
    if response.get("has_more"):
        cursor = response["next_cursor"]
    else:
        break

# Convertir a Markdown
def block_to_markdown(block):
    t = block["type"]
    rich_text = block[t].get("rich_text", [])
    text = "".join([r.get("plain_text", "") for r in rich_text])

    if t == "heading_1":
        return f"# {text}"
    elif t == "heading_2":
        return f"## {text}"
    elif t == "heading_3":
        return f"### {text}"
    elif t == "paragraph":
        return text
    else:
        return f"<!-- tipo de bloque no soportado: {t} -->"

markdown = "\n\n".join([block_to_markdown(b) for b in blocks])
print(markdown)


# Pregunta 1

<!-- tipo de bloque no soportado: divider -->

Respuesta a la pregunta 1

# Pregunta 2

<!-- tipo de bloque no soportado: divider -->

Respuest a la pregunta 2




