In [1]:
from pathlib import Path

from mcp import ClientSession, StdioServerParameters
from mcp.client.sse import sse_client
from mcp.client.stdio import stdio_client

from autogen import LLMConfig
from autogen.agentchat import AssistantAgent
from autogen.mcp import create_toolkit
import json
import anyio
import asyncio

# Only needed for Jupyter notebooks
import nest_asyncio
nest_asyncio.apply()

from autogen.agentchat.group import (
    AgentNameTarget,
    AgentTarget,
    AskUserTarget,
    ContextExpression,
    ContextStr,
    ContextStrLLMCondition,
    ContextVariables,
    ExpressionAvailableCondition,
    ExpressionContextCondition,
    GroupChatConfig,
    GroupChatTarget,
    Handoffs,
    NestedChatTarget,
    OnCondition,
    OnContextCondition,
    ReplyResult,
    RevertToUserTarget,
    SpeakerSelectionResult,
    StayTarget,
    StringAvailableCondition,
    StringContextCondition,
    StringLLMCondition,
    TerminateTarget,
)

from autogen.agentchat.group.patterns import (
    DefaultPattern,
    ManualPattern,
    AutoPattern,
    RandomPattern,
    RoundRobinPattern,
)


from autogen import ConversableAgent, UpdateSystemMessage
from autogen.agents.experimental import DocAgent
import os
import copy
from typing import Any, Dict, List
from pydantic import BaseModel, Field


from autogen.agentchat import initiate_group_chat, a_initiate_group_chat

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#check the current file path
print("Current working directory:", os.getcwd())
#change to the directory where the source located



Current working directory: /Users/apple/Documents/GitHub/CMB-Agent/mcp_hackathon


# Load From Notion

In [3]:
# Path to the arxiv MCP server
mcp_server_path = Path("src/mcp_notion.py")

In [4]:
from google_auth_oauthlib.flow import InstalledAppFlow
import json

In [5]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

import os

# Access tokens
NOTION_TOKEN = os.getenv("NOTION_TOKEN")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [7]:
from src.mcp_notion import NotionReader

In [8]:
# Set up the Notion MCP server
notion_server_path = Path("mcp_notion.py")

In [9]:
from mcp import ClientSession, StdioServerParameters
from mcp.client.sse import sse_client
from mcp.client.stdio import stdio_client


In [10]:
PAGE_ID = "160ab8e5212680239210c6501484cd90"

In [12]:
async def read_notion_page():
    """Read content from a specific Notion page using the NotionReader."""
    try:
        # Create a NotionReader instance
        reader = NotionReader()
        
        print(f"Reading Notion page with ID: {PAGE_ID}")
        
        # Use the reader as a context manager to ensure proper cleanup
        async with reader:
            # Get page content
            page = await reader.get_page(PAGE_ID)
            
            # Get page blocks
            blocks_response = await reader.get_block_children(PAGE_ID)
            blocks = blocks_response.get("results", [])
            
            # Format the results for easier consumption
            formatted_result = format_page_content(page, blocks)
            
            # Print a summary of what we found
            print(f"Page title: {formatted_result.get('title', 'Untitled')}")
            print(f"Found {len(formatted_result.get('content', []))} content blocks")
            
            # Save the full content to a JSON file for inspection
            with open("notion_page_content.json", "w") as f:
                json.dump(formatted_result, f, indent=2)
            print("Full content saved to notion_page_content.json")
            
            # Also print the first few blocks
            print("\nPreview of content:")
            for i, block in enumerate(formatted_result.get('content', [])[:5]):  # First 5 blocks
                block_text = block.get('text', '')[:100]  # First 100 chars
                print(f"{i+1}. [{block.get('type')}]: {block_text}..." if len(block_text) > 100 else block_text)
            
            return formatted_result
            
    except Exception as e:
        print(f"Error reading Notion page: {str(e)}")
        raise


In [13]:
def format_page_content(page, blocks):
    """Format page and blocks into a more user-friendly structure."""
    # Extract title if available
    title = ""
    for prop_name, prop_value in page.get("properties", {}).items():
        if prop_value.get("type") == "title" and prop_value.get("title"):
            title = "".join([text.get("plain_text", "") for text in prop_value.get("title", [])])
            break
    
    # Format blocks
    formatted_blocks = []
    for block in blocks:
        block_type = block.get("type")
        
        # Default structure for a formatted block
        formatted_block = {
            "id": block.get("id"),
            "type": block_type,
            "text": ""
        }
        
        # Extract text based on block type
        if block_type in ["paragraph", "heading_1", "heading_2", "heading_3", 
                         "bulleted_list_item", "numbered_list_item"]:
            # These block types have a similar structure
            rich_text = block.get(block_type, {}).get("rich_text", [])
            formatted_block["text"] = "".join([text.get("plain_text", "") for text in rich_text])
        
        elif block_type == "code":
            rich_text = block.get("code", {}).get("rich_text", [])
            formatted_block["text"] = "".join([text.get("plain_text", "") for text in rich_text])
            formatted_block["language"] = block.get("code", {}).get("language")
        
        # Add more block types as needed
        
        formatted_blocks.append(formatted_block)
    
    return {
        "id": page.get("id"),
        "created_time": page.get("created_time"),
        "last_edited_time": page.get("last_edited_time"),
        "title": title,
        "content": formatted_blocks
    }

In [14]:
result = await read_notion_page()

Reading Notion page with ID: 160ab8e5212680239210c6501484cd90
Page title: Interesting papers
Found 100 content blocks
Full content saved to notion_page_content.json

Preview of content:

Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
Here's how RAG works:
Document Processing: Your documents are processed and converted into embeddings (vector representati
Storage: These embeddings are stored in a vector database or similar storage system


# Start the Agent

In [17]:
import json
import os
from typing import Dict, List, Any
from pathlib import Path

import autogen
from autogen import LLMConfig
from autogen.agentchat import AssistantAgent, UserProxyAgent
from autogen.agentchat.group import (
    GroupChatConfig,
    NestedChatTarget,
    Handoffs,
)
from autogen.agentchat.groupchat import GroupChat, GroupChatManager
from autogen.agentchat.group.patterns import AutoPattern, DefaultPattern

# For Jupyter notebooks
import nest_asyncio
nest_asyncio.apply()


In [19]:

nest_asyncio.apply()


config_list = [
    {
        "model": "gpt-4o-mini",
        "api_key": os.environ.get("OPENAI_API_KEY")
    }
]


In [20]:
llm_config = LLMConfig(
    config_list=config_list,
    timeout=120,
    cache_seed=42
)

In [21]:
def load_notion_content(file_path: str) -> Dict[str, Any]:
    """Load the Notion page content from JSON file."""
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data


In [22]:
from autogen.agentchat.group.patterns import AutoPattern
from autogen.agentchat.group import Handoffs

In [23]:

def create_notion_agents():
    """Create specialized agents for different aspects of the Notion content."""
    
    # Reader Agent - Identifies structure and papers
    reader_agent = AssistantAgent(
        name="Reader",
        llm_config=llm_config,
        system_message="""You are the Reader Agent. Your role is to:
1. Read and parse the Notion page content
2. Identify the different research papers in the content
3. Organize the content into logical sections by paper
4. Report the structure clearly to the Analyzer
Always start your response with "@Analyzer" to ensure direct communication.
"""
    )
    
    # Analyzer Agent - Extracts key information from each paper
    analyzer_agent = AssistantAgent(
        name="Analyzer",
        llm_config=llm_config,
        system_message="""You are the Analyzer Agent. Your role is to:
1. Analyze each research paper identified by the Reader
2. Extract key contributions, methodologies, and findings
3. Identify connections between different papers
4. Highlight important technical concepts
Always start your response with "@Summarizer" to ensure direct communication.
"""
    )
    
    # Summarizer Agent - Creates concise summaries 
    summarizer_agent = AssistantAgent(
        name="Summarizer",
        llm_config=llm_config,
        system_message="""You are the Summarizer Agent. Your role is to:
1. Create concise summaries of each research paper
2. Highlight the most important innovations and findings
3. Connect related concepts across papers
4. Produce a final integrated summary
Always start your response with "@Editor" to ensure direct communication.
"""
    )
    
    # Editor Agent - Polishes the final output
    editor_agent = AssistantAgent(
        name="Editor",
        llm_config=llm_config,
        system_message="""You are the Editor Agent. Your role is to:
1. Review the content produced by the Summarizer
2. Improve clarity, organization, and flow
3. Correct any errors or inconsistencies
4. Format the final output in a professional, readable manner
5. Ensure the summary is complete and captures all key points
Your output is the final product that will be presented to the user.
"""
    )
    
    # User Proxy - Initiates the task
    user_proxy = UserProxyAgent(
        name="User",
        code_execution_config={"work_dir": ".", "use_docker": False},
        human_input_mode="NEVER"
    )
    
    return user_proxy, reader_agent, analyzer_agent, summarizer_agent, editor_agent


In [25]:
def format_notion_content(notion_data):
    """Format the Notion content for easier consumption by agents."""
    page_title = notion_data.get('title', 'Untitled Page')
    content_blocks = notion_data.get('content', [])
    
    formatted_texts = []
    
    # Process each block based on its type
    for block in content_blocks:
        block_type = block.get('type', '')
        block_text = block.get('text', '')
        
        if not block_text.strip():
            continue
            
        if block_type.startswith('heading_'):
            level = block_type[-1]
            prefix = '#' * int(level)
            formatted_texts.append(f"{prefix} {block_text}")
        elif block_type == 'bulleted_list_item':
            formatted_texts.append(f"* {block_text}")
        elif block_type == 'numbered_list_item':
            formatted_texts.append(f"- {block_text}")
        else:
            formatted_texts.append(block_text)
    
    return {
        "title": page_title,
        "formatted_content": "\n\n".join(formatted_texts)
    }

In [28]:
def run_notion_summarization(file_path):
    """Main function to run the notion summarization pipeline."""
    
    # Load and format notion content
    notion_data = load_notion_content(file_path)
    formatted_data = format_notion_content(notion_data)
    
    # Create agents
    user_proxy, reader_agent, analyzer_agent, summarizer_agent, editor_agent = create_notion_agents()
    
    # Set up the group chat
    group_chat = GroupChat(
        agents=[user_proxy, reader_agent, analyzer_agent, summarizer_agent, editor_agent],
        messages=[],
        max_round=15
    )
    
    # Configure the chat manager with direct handoffs
    manager = GroupChatManager(
        groupchat=group_chat,
        llm_config=llm_config,
        pattern=AutoPattern(
            starter=user_proxy,
            handoffs=Handoffs(mapping={
                "User": "Reader",
                "Reader": "Analyzer",
                "Analyzer": "Summarizer",
                "Summarizer": "Editor",
                "Editor": "User"  # Back to user with final result
            })
        )
    )
    
    # Create the initial prompt
    prompt = f"""I need a comprehensive summary of the research papers in this Notion page titled "{formatted_data['title']}".

Here is the content:

{formatted_data['formatted_content']}

Please analyze each paper, extract key findings and innovations, and create a well-structured summary that connects related concepts across papers.
"""
    
    # Run the group chat
    result = manager.groupchat.run(prompt)
    
    # Save the result
    with open("notion_summary_result.md", "w") as f:
        f.write(result)
    
    return result

In [29]:
file_path = "data/notion_page_content.json"
summary = run_notion_summarization(file_path)
print("Summary completed and saved to notion_summary_result.md")

TypeError: AutoPattern.__init__() got an unexpected keyword argument 'starter'