# Setup Environment

In [None]:
# Setup configuration
import sys
sys.path.append('/Users/benyoung/projects/ai-me')

from src.config import Config
from IPython.display import Markdown
from agents import trace, Runner

config = Config()

print(config)

# Download, Load, Chunk, Vectorize and Store md files in Chroma

In [None]:
from importlib import reload
import src.data as _data_module
reload(_data_module)
from src.data import DataManager, DataManagerConfig


# Use consolidated data manager
# For some reason, the glob pattern does not follow symlinks properly, so specify directly here
data_config = DataManagerConfig(
    doc_load_local=["me/**/*.md"],
    github_repos=config.github_repos
)
data_manager = DataManager(config=data_config)

# Load all repos configured in config.github_repos (automatically loads based on list presence)
chunks = data_manager.load_and_process_all(github_repos=config.github_repos)

print(f"Total chunks created: {len(chunks)}")

# Create the vectorstore using DataManager
vectorstore = data_manager.create_vectorstore(chunks, reset=True)
retriever = vectorstore.as_retriever()


data_manager.show_docs_for_file("faq.md")

# Setup Agents

In [None]:
from src.agent import AIMeAgent

# Initialize agent config with vectorstore
agent_config = AIMeAgent(
    bot_full_name=config.bot_full_name, 
    model=config.model,
    vectorstore=vectorstore,
    github_token=config.github_token
)

ai_me = await agent_config.create_ai_me_agent()


In [None]:
# Check GitHub Rate Limits
from github import Github, Auth
import time

# First, let's check our current GitHub API rate limits
print("=" * 80)
print("CHECKING GITHUB API RATE LIMITS BEFORE TOOL CREATION")
print("=" * 80)

try:
    auth = Auth.Token(config.github_token.get_secret_value())
    g = Github(auth=auth)
    rate_limit = g.get_rate_limit()
    
    print(f"\n📊 Core API Rate Limit Status:")
    print(f"   Limit: {rate_limit.resources.core.limit}")
    print(f"   Remaining: {rate_limit.resources.core.remaining}")
    print(f"   Reset time: {rate_limit.resources.core.reset}")
    print(f"   Time until reset: {(rate_limit.resources.core.reset.timestamp() - time.time()) / 60:.1f} minutes")
    
    print(f"\n📊 Search API Rate Limit Status:")
    print(f"   Limit: {rate_limit.resources.search.limit}")
    print(f"   Remaining: {rate_limit.resources.search.remaining}")
    print(f"   Reset time: {rate_limit.resources.search.reset}")
    print(f"   Time until reset: {(rate_limit.resources.search.reset.timestamp() - time.time()) / 60:.1f} minutes")
    
    print(f"\n📊 GraphQL API Rate Limit Status:")
    print(f"   Limit: {rate_limit.resources.graphql.limit}")
    print(f"   Remaining: {rate_limit.resources.graphql.remaining}")
    print(f"   Reset time: {rate_limit.resources.graphql.reset}")
    
except Exception as e:
    print(f"❌ Error checking rate limits: {e}")
    import traceback
    traceback.print_exc()


## Workaround: Use PyGithub Directly

Since GithubSearchTool appears to have a bug, let's create our own search function using PyGithub directly.

In [None]:
from github import Github, Auth
from agents import function_tool
import traceback
import re

# Exclude 'byoung/me' from configured repos

repos = [r for r in config.github_repos if r != "byoung/me"]

@function_tool
def search_github_repo(search_query: str) -> str:
    """Search for code, files, and content across all configured GitHub repositories.
    
    Args:
        search_query: The search term to look for (e.g., 'python', 'bash', 'docker', 'ReaR')
    
    Returns:
        A formatted string containing search results from all repos with file paths, URLs, and content previews
    """
    try:
        # Use the token from config with new Auth.Token method
        auth = Auth.Token(config.github_token.get_secret_value())
        g = Github(auth=auth)
        global repos
        
        all_results = []
        all_results.append(f"Searching for '{search_query}' across {len(repos)} repositories\n")
        all_results.append("=" * 80 + "\n")
        
        total_results_across_repos = 0


        # Search each configured repository
        for repo_full_name in repos:
            all_results.append(f"\n## Repository: {repo_full_name}\n")
            
            try:
                # Get the repository to access default branch
                repo = g.get_repo(repo_full_name)
                default_branch = repo.default_branch
                
                # Try to search code in the repository, excluding markdown files (covered by RAG)
                query = f"{search_query} repo:{repo_full_name} -extension:md"
                code_results = g.search_code(query=query)
                
                # Get total count first to check if there are any results
                total_count = code_results.totalCount
                
                if total_count == 0:
                    all_results.append(f"No results found in {repo_full_name}\n")
                    continue
                
                result_count = 0
                for code_file in code_results:
                    if result_count >= 3:  # Limit to first 3 results per repo
                        break
                    
                    # Rewrite URL from blob/<sha> to blob/<default_branch> for stable links
                    file_url = code_file.html_url
                    # Replace blob/<40-char-sha> with blob/<default_branch>
                    file_url = re.sub(r'/blob/[0-9a-f]{40}/', f'/blob/{default_branch}/', file_url)
                    
                    all_results.append(f"\n📄 File: {code_file.path}")
                    all_results.append(f"   URL: {file_url}")
                    
                    # Get file content preview
                    try:
                        content = code_file.decoded_content.decode('utf-8')[:200]
                        all_results.append(f"   Preview: {content}...")
                    except:
                        all_results.append(f"   (Could not decode content)")
                    
                    result_count += 1
                    total_results_across_repos += 1
                
                all_results.append(f"\n→ Showing {result_count} of {total_count} results from {repo_full_name}")
                
            except Exception as repo_error:
                all_results.append(f"⚠️  Error searching {repo_full_name}: {str(repo_error)}")
        
        all_results.append(f"\n\n" + "=" * 80)
        all_results.append(f"\nTotal results shown: {total_results_across_repos} across {len(repos)} repositories")
        all_results.append(f"\n(Note: Markdown files excluded - covered by RAG system)")
        
        result_text = "\n".join(all_results)
        print(f"SEARCH RESULTS:\n{result_text}")
        
        return result_text
        
    except Exception as e:
        return f"Error searching GitHub: {str(e)}\n{traceback.format_exc()}"



# Display configured repositories
print(f"GitHub search tool configured for {len(repos)} repositories:")
for repo in repos:
    print(f"  • {repo}")
print(f"\nThe agent will search all repos automatically for each query.")
print(f"(Markdown files excluded - already covered by RAG system)")


# Run Agent Tests

In [None]:
# Debug: Test if Docker works and check the exact command
import subprocess

# Test 1: Can we run docker at all?
print("Test 1: Docker version check")
try:
    result = subprocess.run(["docker", "--version"], capture_output=True, text=True, timeout=5)
    print(f"✓ Docker available: {result.stdout.strip()}")
except Exception as e:
    print(f"✗ Docker not available: {e}")

# Test 2: What does the MCP params look like?
print("\nTest 2: Check mcp_github_params")
test_config = AIMeAgent(
    bot_full_name=config.bot_full_name,
    model=config.model,
    vectorstore=vectorstore,
    github_token=config.github_token
)
params = test_config.mcp_github_params
print(f"Command: {params.command}")
print(f"Args: {params.args}")
print(f"Env: {params.env}")

# Test 3: Try running the docker command directly
print("\nTest 3: Try running docker with env vars directly")
token = config.github_token.get_secret_value()
cmd = [
    "docker", "run", "-i", "--rm",
    "-e", f"GITHUB_PERSONAL_ACCESS_TOKEN={token}",
    "-e", "GITHUB_TOOLSETS=repo",
    "ghcr.io/github/github-mcp-server"
]
print(f"Command: {' '.join(cmd[:7])}... (token hidden)")

# Don't actually run it interactively, just test if docker accepts the command format
print("\nTest 4: Validate docker accepts the command")
test_cmd = ["docker", "run", "--help"]
result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=5)
if result.returncode == 0:
    print("✓ Docker run command format is valid")
else:
    print(f"✗ Docker issue: {result.stderr}")

In [None]:
# Test 5: Try running the Docker container to see what it outputs
print("\nTest 5: Check if Docker container starts and what it outputs")
import subprocess
import os

# Set the env vars
os.environ['GITHUB_PERSONAL_ACCESS_TOKEN'] = config.github_token.get_secret_value()
os.environ['GITHUB_TOOLSETS'] = 'repo'

# Try to run the container and see what happens
cmd = [
    "docker", "run", "--rm",
    "-e", "GITHUB_PERSONAL_ACCESS_TOKEN",
    "-e", "GITHUB_TOOLSETS",
    "ghcr.io/github/github-mcp-server",
    "--help"  # Try with help flag
]

try:
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
    print(f"Return code: {result.returncode}")
    print(f"STDOUT:\n{result.stdout}")
    print(f"STDERR:\n{result.stderr}")
except subprocess.TimeoutExpired:
    print("Command timed out")
except Exception as e:
    print(f"Error: {e}")

In [None]:
# Reload agent module to pick up latest changes
import src.agent as _agent_module
reload(_agent_module)
from src.agent import AIMeAgent

# Recreate agent config with updated module
agent_config = AIMeAgent(
    bot_full_name=config.bot_full_name, 
    model=config.model,
    vectorstore=vectorstore,
    github_token=config.github_token
)

# Create agent with custom prompt and the unified GitHub search tool
ai_me = await agent_config.create_ai_me_agent(
    agent_prompt="""
You are acting as somebody who personifying {self.bot_full_name} and must follow these rules:
 * If the user asks a question, use the get_local_info tool ONCE to gather more info
 * Answer based on the information given to you by the tool calls
 * do not offer follow up questions, just answer the question
 * NEVER use the search_code tool
 * Use normal utf8 chars, not things like 】
 * Add reference links at the end of the output if they contain https://github.com
 """,
    mcp_params=[agent_config.mcp_github_params,agent_config.mcp_time_params],
    #additional_tools=[search_github_repo]
)

with trace("test-1"):
    result = await Runner.run(ai_me, "What do you know about ReaR?")

display(Markdown(result.final_output))


In [None]:
with trace("test-2"):
    result = await Runner.run(ai_me, "What do you know about python?")
display(Markdown(result.final_output))


In [None]:
with trace("test-3"):
    result = await Runner.run(ai_me, "Give me a summary of all the commits you've made in the last week")
display(Markdown(result.final_output))


In [None]:
with trace("test-4"):
    result = await Runner.run(ai_me, "who is slartibartfast?")
display(Markdown(result.final_output))

In [None]:
import gradio

async def chat(user_input: str, history):
    print("================== USER ===================")
    print(user_input)

    result = await Runner.run(ai_me, user_input)

    print("================== AGENT ==================")
    print(result.final_output)
    return result.final_output

with gradio.Blocks(theme=gradio.themes.Ocean()) as ui:

    gradio.Markdown(f"""# Welcome to {config.app_name}
                    The digital version of {config.bot_full_name}
                    The digital assistant that you never knew you needed ;)
                    Feel free to ask me anything about my experience, skills, projects, and interests.
                    """)
    gradio.ChatInterface(chat, type="messages")

ui.launch()

# The End