# Setup Environment

In [None]:
# Setup configuration
import sys
sys.path.append('/Users/benyoung/projects/ai-me')

from src.config import config
from IPython.display import Markdown
from agents import trace, Runner, Agent, Tool

print(f"Using model: {config.model}")



# Download, Load, Chunk, Vectorize and Store md files in Chroma

In [None]:
from importlib import reload
import src.data as _data_module
reload(_data_module)
from src.data import DataManager


# Use consolidated data manager
# For some reason, the glob pattern does not follow symlinks properly, so specify directly here
data_manager = DataManager(
    doc_load_local=["me/**/*.md"],
    github_repos=config.github_repos
)

# Load all repos configured in config.github_repos
chunks = data_manager.load_and_process_all(
    include_local=False,
    include_github=True,
    github_repos=config.github_repos  # Process all repos
)

print(f"Total chunks created: {len(chunks)}")

# Create the vectorstore using DataManager
vectorstore = data_manager.create_vectorstore(chunks, reset=True)
retriever = vectorstore.as_retriever()


data_manager.show_docs_for_file("faq.md")


In [None]:
# Setup MCP servers
from agents.mcp import MCPServerStdio
import shutil
import asyncio

# Verify Docker is available
if not shutil.which("docker"):
    raise RuntimeError("""Docker is not available in this environment. Run this on a machine with 
                        Docker or use an IDE MCP integration.""")

# Use MCP params from config
# Override to enable MCP servers in notebook (disabled by default in config)
all_params = [config.mcp_github_params, config.mcp_time_params]

# Create MCP servers for each parameter set
mcp_servers = []
for i, params in enumerate(all_params):
    try:
        print(f"Initializing MCP server {i+1}...")
        async with MCPServerStdio(params=params, client_session_timeout_seconds=30) as server:
            tools = await server.list_tools()
            print(f"Server {i+1} initialized successfully with {len(tools)} tools")
        # Create a new instance for actual use
        mcp_servers.append(MCPServerStdio(params))
    except Exception as e:
        print(f"Error initializing MCP server {i+1}: {e}")
        # Continue with other servers even if one fails
        continue

print(f"Created {len(mcp_servers)} MCP servers. Starting now...")

# Start MCP Servers
for i, server in enumerate(mcp_servers):
    try:
        await server.connect()
        print(f"Connected to MCP server {i+1}")
    except Exception as e:
        print(f"Error connecting to MCP server {i+1}: {e}")
        continue


In [None]:
from agents import function_tool

@function_tool
async def get_local_info(query: str) -> str:
    """get more context based on the subject of the question.
    Our vector store will contain information about our personal and professional experience
    in all things technology."""
    print("QUERY:", query)
    docs_content = ""
    retrieved_docs = vectorstore.similarity_search_with_score(query, k=3)
    print(f"Retrieved {len(retrieved_docs)} documents from vector store.")
    for doc, score in retrieved_docs:
        source_link = f"https://github.com/{doc.metadata['github_repo']}/tree/main/{doc.metadata['file_path']}\n"

        print(f" --------------- {doc.metadata['file_path']} ({score}) ---------------")
        print(f"{doc.page_content[:2000]}")

        if(score < 1.5):
            docs_content += f"Source: {source_link}" + doc.page_content + "\n\n"

    return docs_content


# Setup Agents

In [None]:
from openai import AsyncOpenAI

# Agent client already set up by config
print(f"Using model: {config.model}")

async def get_researcher(mcp_servers) -> Agent:
    researcher = Agent(
        name="Source Code Researcher",
        instructions= f"""
            You're a source code researcher that uses your tools to gather information from github.
            When searching source code, filter to only commits by the given GitHub username.
            """,
        model=config.model,
        mcp_servers=mcp_servers,
    )
    return researcher


async def get_researcher_tool(mcp_servers) -> Tool:
    researcher = await get_researcher(mcp_servers)
    return researcher.as_tool(
            tool_name="SourceCodeResearcher",
            tool_description="""
                This tool is for searching through source code repositories. 
                Use this tool if you have a github username and repo to filter on"""
        )

researcher_tool = await get_researcher_tool(mcp_servers)


In [None]:
ins = f"""
You're acting as somebody who personifying Ben Young and must follow these rules:
 * If the user asks a question, use the get_local_info tool to gather more info but don't include my name in the query
 * Say you don't know if the get local info tool returns weak or no relevant info
 * don't offer follow up questions, just answer the question
 * Add inline references using shorthand links like '[1](link)' if they contain https://github.com
"""
print(ins)
ai_me = Agent(
    model=config.model,
    name="ai-me",
    instructions=ins,
    tools=[get_local_info],
    # Turn off our github researcher tool until we can optimize the response time
    # The researcher tool gives a much better response when used, but it's very slow (and expensive)
    #tools=[researcher_tool, get_local_info],
)

with trace("test-1"):
    result = await Runner.run(ai_me, "What do you know about LUKS?")

display(Markdown(result.final_output))


In [None]:
with trace("test-2"):
    result = await Runner.run(ai_me, "What are you most proud of?")
display(Markdown(result.final_output))


In [None]:
with trace("test-3"):
    result = await Runner.run(ai_me, "Can you summarize all your blog posts into a 5-7 sentence paragraph?")
display(Markdown(result.final_output))


In [None]:
with trace("test-4"):
    result = await Runner.run(ai_me, "who is slartibartfast?")
display(Markdown(result.final_output))

In [None]:
import gradio

async def chat(user_input: str, history):
    print("================== USER ===================")
    print(user_input)

    result = await Runner.run(ai_me, user_input)

    print("================== AGENT ==================")
    print(result.final_output)
    return result.final_output

with gradio.Blocks(theme=gradio.themes.Ocean()) as ui:

    gradio.Markdown("""# Welcome to Ben-Bot
                    The digital version of Ben Young, software engineer, architect, leader and technology enthusiast.
                    The digital assistant that you never knew you needed ;)
                    Feel free to ask me anything about my experience, skills, projects, and interests.
                    """)
    gradio.ChatInterface(chat, type="messages")

ui.launch()

# The End