# Setup Environment

In [None]:
# Setup configuration
import sys
sys.path.append('/Users/benyoung/projects/ai-me')

from src.config import config
from IPython.display import Markdown
from agents import trace, Runner, Agent, Tool

print(f"Using model: {config.model}")



# Download, Load, Chunk, Vectorize and Store md files in Chroma

In [None]:
from src.data import DataManager

# Use consolidated data manager
# For some reason, the glob pattern does not follow symlinks properly, so specify directly here
data_manager = DataManager(
    doc_load_local=["me/**/*.md"],
    github_repos=config.github_repos
)

# Load all repos configured in config.github_repos
chunks = data_manager.load_and_process_all(
    include_local=False,
    include_github=True,
    github_repos=config.github_repos  # Process all repos
)

print(f"Total chunks created: {len(chunks)}")

# Create the vectorstore using DataManager
vectorstore = data_manager.create_vectorstore(chunks, reset=True)
retriever = vectorstore.as_retriever()


# Setup MCP Servers and Tools

In [None]:
# Setup MCP servers
from agents.mcp import MCPServerStdio
import shutil
import asyncio

# Verify Docker is available
if not shutil.which("docker"):
    raise RuntimeError("""Docker is not available in this environment. Run this on a machine with 
                        Docker or use an IDE MCP integration.""")

# Use MCP params from config
# Override to enable MCP servers in notebook (disabled by default in config)
all_params = [config.mcp_github_params, config.mcp_time_params]

# Create MCP servers for each parameter set
mcp_servers = []
for i, params in enumerate(all_params):
    try:
        print(f"Initializing MCP server {i+1}...")
        async with MCPServerStdio(params=params, client_session_timeout_seconds=30) as server:
            tools = await server.list_tools()
            print(f"Server {i+1} initialized successfully with {len(tools)} tools")
        # Create a new instance for actual use
        mcp_servers.append(MCPServerStdio(params))
    except Exception as e:
        print(f"Error initializing MCP server {i+1}: {e}")
        # Continue with other servers even if one fails
        continue

print(f"Created {len(mcp_servers)} MCP servers. Starting now...")

# Start MCP Servers
for i, server in enumerate(mcp_servers):
    try:
        await server.connect()
        print(f"Connected to MCP server {i+1}")
    except Exception as e:
        print(f"Error connecting to MCP server {i+1}: {e}")
        continue


In [None]:
from agents import function_tool

@function_tool
async def get_local_info(query: str) -> str:
    """get more context based on the subject of the question.
    Our vector store will contain information about our personal and professional experience
    in all things technology."""
    print("QUERY:", query)
    retrieved_docs = vectorstore.similarity_search(query)
    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    return docs_content

Non-LangChain example of getting md files from a repo
I don't like this solution as it scrapes everything, 
but it may come in handy later.

```python
from github import Github, Auth
#
# Get MD files from remote repos
#
REPOS = ["Neosofia/corporate"]
g = Github(auth=Auth.Token(os.environ["GITHUB_PERSONAL_ACCESS_TOKEN"]))
documents = []

for repo_name in REPOS:
    repo = g.get_repo(repo_name)
    contents = repo.get_contents("")
    while contents:
        file_content = contents.pop(0)
        if file_content.type == "dir":
            contents.extend(repo.get_contents(file_content.path))
        elif os.path.splitext(file_content.path)[1] == ".md":
            print("ADDING FILE:", file_content)
```

# Setup Agents

In [None]:
from openai import AsyncOpenAI

# Agent client already set up by config
print(f"Using model: {config.model}")

async def get_researcher(mcp_servers) -> Agent:
    researcher = Agent(
        name="Source Code Researcher",
        instructions= f"""
            You're a source code researcher that uses your tools to gather information from github.
            When searching source code, filter to only commits by the given GitHub username.
            """,
        model=config.model,
        mcp_servers=mcp_servers,
    )
    return researcher


async def get_researcher_tool(mcp_servers) -> Tool:
    researcher = await get_researcher(mcp_servers)
    return researcher.as_tool(
            tool_name="SourceCodeResearcher",
            tool_description="""
                This tool is for searching through source code repositories. 
                Use this tool if you have a github username and repo to filter on"""
        )

researcher_tool = await get_researcher_tool(mcp_servers)


In [None]:
print(config.agent_prompt)
ai_me = Agent(
    model=config.model,
    name="ai-me",
    instructions=config.agent_prompt,
    tools=[get_local_info],
    # Turn off our github researcher tool until we can optimize the response time
    # The researcher tool gives a much better response when used, but it's very slow (and expensive)
    #tools=[researcher_tool, get_local_info],
)

with trace("test-1"):
    result = await Runner.run(ai_me, "Do you have experience with ReaR?")

display(Markdown(result.final_output))


In [None]:
with trace("test-2"):
    result = await Runner.run(ai_me, "What kind of experience do you have with SQL?")
display(Markdown(result.final_output))


In [None]:
with trace("test-3"):
    result = await Runner.run(ai_me, "Can you summarize all your blog posts into a 5-7 sentence paragraph?")
display(Markdown(result.final_output))


In [None]:
import gradio

async def chat(user_input: str, history):
    print("================== USER ===================")
    print(user_input)

    result = await Runner.run(ai_me, user_input)

    print("================== AGENT ==================")
    print(result.final_output)
    return result.final_output

view = gradio.ChatInterface(chat, type="messages").launch()


# LangGraph Testing (WIP)

In [None]:
from langchain_mcp_adapters.client import MultiServerMCPClient

client = MultiServerMCPClient(
    {
        "github": {
            "command": "docker",
            "args": [
                "run","-i","--rm",
                "-e","GITHUB_PERSONAL_ACCESS_TOKEN",
                "ghcr.io/github/github-mcp-server"
            ],
            "env": {
                "GITHUB_PERSONAL_ACCESS_TOKEN": os.environ["GITHUB_PERSONAL_ACCESS_TOKEN"],
            },
            "transport": "stdio",
        },
        "time": {
            "command": "uvx",
            "args": [
              "mcp-server-time",
              "--local-timezone=America/New_York"
            ],
            "transport": "stdio",
        }
    }
)
tools = await client.get_tools()


In [None]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from typing import Annotated
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain_mcp_adapters.tools import load_mcp_tools


llm = ChatOpenAI(model=MODEL).bind_tools(tools, parallel_tool_calls=False)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "{system_prompt}"),
        MessagesPlaceholder("messages")
    ]
)


llm_model = prompt_template | llm

class State(TypedDict):
    messages: Annotated[list, add_messages]
    question: str
    context: List[Document]

def chatbot(state: State):
    print(state)
    retrieved_docs = vectorstore.similarity_search(state["question"])
    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
    #prompt_msg = prompt.invoke({"question": state["question"], "context": docs_content, "messages": state["messages"]})
    with trace("ai-ben"):
        result = await Runner.run(researcher, system_prompt, max_turns=30)
        state["messages"] = llm_model.invoke({"system_prompt": system_prompt, "messages": state["messages"]})
    
    return state

graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge(START, "chatbot")

#graph_builder = StateGraph(State).add_sequence([retrieve, generate])
#graph_builder.add_edge(START, "retrieve")

memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)

In [None]:
import gradio

config = {"configurable": {"thread_id": "3"}}

def chat(user_input: str, history):
    result = graph.invoke({"messages": [{"role": "user", "content": user_input}], "question": user_input}, config=config)
    return result["messages"][-1].content

view = gradio.ChatInterface(chat, type="messages").launch()

# The End