In [19]:
import os
from openai import OpenAI
import requests
import json
import re

In [37]:
def get_article(title: str) -> str:
    """
    """
    try:
        url = f"https://en.wikipedia.org/wiki/{title}?action=raw"
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        
        content = res.text
        
        # Extract only the introduction (everything before the first section header)
        intro_match = re.search(r'^(.*?)(?=\n==|\Z)', content, flags=re.DOTALL)
        if intro_match:
            intro_content = intro_match.group(1)
        else:
            intro_content = content  # Fallback if no sections found
        
        # Clean the intro content
        cleaned_intro = clean_wikitext(intro_content)

        return f"Title: {title}\n\nIntroduction:\n{cleaned_intro}\n"
    except Exception as e:
        return f"Error: Could not retrieve article '{title}': {str(e)}"

def clean_wikitext(content: str) -> str:
    """
    Cleans the wikitext content by removing HTML comments, reference tags,
    administrative templates, and unnecessary formatting, while keeping infoboxes
    and simplifying file descriptions.
    """
    
    # Remove HTML comments
    content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)
    
    # Remove reference tags and their content
    content = re.sub(r'<ref[^>]*>.*?</ref>', '', content, flags=re.DOTALL)
    content = re.sub(r'<ref[^>]*\s*/>', '', content)
    
    # Remove administrative templates
    admin_templates = [
        r'\{\{Short description\|[^}]*\}\}',
        r'\{\{About\|[^}]*\}\}',
        r'\{\{Redirect\|[^}]*\}\}',
        r'\{\{For\|[^}]*\}\}',
        r'\{\{protection padlock[^}]*\}\}',
        r'\{\{Use [^}]*\}\}',
    ]
    
    for pattern in admin_templates:
        content = re.sub(pattern, '', content, flags=re.IGNORECASE)
    
    # Keep infoboxes but simplify file descriptions
    content = re.sub(r'\[\[File:([^|\]]+)[^]]*\]\]', r'[[File:\1]]', content)
    
    # Remove cleanup templates
    content = re.sub(r'\{\{citation needed\}\}', '', content, flags=re.IGNORECASE)
    content = re.sub(r'\{\{unreliable source[^}]*\}\}', '', content, flags=re.IGNORECASE)
    
    # Clean up multiple blank lines
    content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
    content = re.sub(r'\s+\n', '\n', content)
    content = re.sub(r'\n\s+', '\n', content)
    
    return content.strip()

In [44]:
system_prompt = """You are participating in a Wikipedia navigation game. Your objective is to go from a given START article to a TARGET article by following internal Wikipedia links.

Available tool:
- get_article(title: str) -> str  
  Returns the introduction section (snippet) of the specified Wikipedia article in raw wikitext format.

Turn structure:
1. You receive the raw wikitext of the current article's introduction section only.
2. Identify all internal links that are visible in this introduction snippet in the forms:
   - `[[Page Title|Display Text]]`
   - `[[Page Title]]`
3. From the links visible in this snippet, pick the single most strategic link that advances you toward the TARGET while minimizing total steps.
4. Extract the exact page title (everything before the `|`, or the entire content if there is no `|`).
5. Respond with:
   <think>
   [Your reasoning: why this link from the available options, how it brings you closer to the target, etc.]
   </think>
   <tool>
   {"name": "get_article", "args": {"title": "<Exact Page Title>"}}
   </tool>
6. Repeat until you reach the TARGET.

Important constraints:
- You can only see and use links that appear in the introduction snippet provided
- You cannot access the full article content - only what's shown in the snippet
- Choose wisely from the limited links available in each snippet

When you reach the TARGET, output:
<success>
Reached target article: <TARGET>
</success>
"""

def parse_thinking_from_response(response: str) -> str | None:
    thinking = re.search(r'<think>(.*?)</think>', response, re.DOTALL)
    return thinking.group(1).strip() if thinking else None

def parse_tool_from_response(response: str) -> dict | None:
    tool_call = re.search(r'<tool>(.*?)</tool>', response, re.DOTALL)
    return json.loads(tool_call.group(1)) if tool_call else None

def parse_success_from_response(response: str) -> str | None:
    success = re.search(r'<success>(.*?)</success>', response, re.DOTALL)
    return success.group(1).strip() if success else None

def call_tool(tool_call: dict) -> str:
    if tool_call['name'] == 'get_article':
        return get_article(tool_call['args']['title'])
    else:
        return f"Error: Tool {tool_call['name']} not found"

In [None]:
start = "Quantum Mechanics"
target = "Isaac Newton"

In [46]:
def build_context(system_prompt: str, start: str, target: str, path_taken: list, current_article_content: str = None): # type: ignore
    """Build context with only current relevant information"""
    messages = [
        {"role": "system", "content": system_prompt}
    ]
    
    # Context message
    if path_taken:
        path_str = " -> ".join(path_taken)
        context_msg = f"Navigate from '{start}' to '{target}'.\n\nPath taken so far: {path_str}\n\nCurrent article content:"
    else:
        context_msg = f"Navigate from '{start}' to '{target}'.\n\nStart by getting the initial article."
    
    messages.append({"role": "user", "content": context_msg})
    
    # Add current article if we have it
    if current_article_content:
        messages.append({"role": "user", "content": current_article_content})
    
    return messages

model = "gpt-4.1-mini"
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
max_steps = 10
step = 0
path_taken = []
current_article_content = None

print(f"Starting WikiGame: {start} -> {target}\n")

while step < max_steps:
    # Build fresh context for each step
    messages = build_context(system_prompt, start, target, path_taken, current_article_content)  # type: ignore

    response = client.chat.completions.create(
        model=model,
        messages=messages, # type: ignore
        max_tokens=1500,
        temperature=0.7
    )

    response_text = response.choices[0].message.content
    print(f"=== Step {step + 1} ===")
    
    thinking = parse_thinking_from_response(response_text) # type: ignore
    tool_call = parse_tool_from_response(response_text) # type: ignore
    success_message = parse_success_from_response(response_text) # type: ignore

    if success_message:
        print(f"SUCCESS: {success_message}")
        print(f"Final path: {' -> '.join(path_taken)}")
        print(f"Total steps: {len(path_taken)}")
        break

    if thinking:
        print(f"Thinking: {thinking}")
        
    if tool_call:
        article_title = tool_call['args']['title']

        # Check if we've reached the target
        if article_title.lower().replace('_', ' ') == target.lower().replace('_', ' '):
            print(f"SUCCESS: Reached target: {target}")
            print(f"Final path: {' -> '.join(path_taken + [article_title])}")
            print(f"Total steps: {len(path_taken)}")
            break
        
        print(f"Navigating to: {article_title}")
        
        tool_response = call_tool(tool_call)
        
        # Check for errors
        if tool_response.startswith("Error:"):
            print(f"ERROR: {tool_response}")
            break
            
        current_article_content = tool_response
        path_taken.append(article_title)
        
        print(f"Article loaded: {article_title} ({len(tool_response)} chars)")
            
    else:
        print("No valid tool call found, exiting.")
        break

    step += 1
    print()

if step == max_steps:
    print(f"TIMEOUT: Reached maximum steps ({max_steps}) without success.")
    print(f"Path taken: {' -> '.join(path_taken)}")

# Print final stats
print(f"\nGame Stats:")
print(f"Steps taken: {step}")
print(f"Success: {'Yes' if step < max_steps and len(path_taken) > 0 else 'No'}")

Starting WikiGame: Quantum Mechanics -> Photoelectric Effect

=== Step 1 ===
Navigating to: Quantum Mechanics
Article loaded: Quantum Mechanics (118 chars)

=== Step 2 ===
Thinking: The current page "Quantum Mechanics" is a redirect to "Quantum mechanics". To proceed correctly, I should follow the redirect and get the article "Quantum mechanics" which will have the actual introduction and links to navigate from.
Navigating to: Quantum mechanics
Article loaded: Quantum mechanics (2946 chars)

=== Step 3 ===
Thinking: The introduction contains a direct link to the "Photoelectric effect" within the mention of Albert Einstein's 1905 paper explaining the photoelectric effect. Since the target article is "Photoelectric Effect", clicking this link will lead directly to the target or get me very close to it.
SUCCESS: Reached target: Photoelectric Effect
Final path: Quantum Mechanics -> Quantum mechanics -> Photoelectric effect
Total steps: 2

Game Stats:
Steps taken: 2
Success: Yes
