In [4]:
import yaml
import os
import re 
import random
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate

with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)
openai_key = config["openai_key"]
anthropic_key = config["anthropic_key"]

In [8]:
os.environ["ANTHROPIC_API_KEY"] = anthropic_key

chat = ChatAnthropic(temperature=0, model_name="claude-3-haiku-20240307")
crawler_system = (
    """You are an AI language model tasked to beat the Wiki Game. 
    Your goal is, from a wikipedia page, to get the next link to click to go to the next page."""
)

In [9]:
from wikipedia_functions import get_random_page, get_page_links, get_page_content, check_wikipedia_pages_existence

start_page = get_random_page(1)[0]
final_page = "United States"
print(start_page)
final_page_content = get_page_content(final_page)

2006 Spa 24 Hours


In [10]:
from prompt_functions import get_crawler_template, get_summarize_template

crawler_template = get_crawler_template()
summarize_template = get_summarize_template() 

In [11]:
summarize_system = (
    """You are an AI language model tasked to summarize the Wikipedia content of the end page of a Wiki Race Game.
    Through this summary, you are supposed to help a crawler agent to get to the end page."""
)
human = summarize_template
prompt = ChatPromptTemplate.from_messages([("system", crawler_system), ("human", human)])
chain = prompt | chat

In [12]:
end_summary = chain.invoke(
        {
            "page_content": final_page_content
        }
    )

In [14]:
model_output = end_summary.content
pattern = r'<summary>(.*?)</summary>'
matches = re.findall(pattern, model_output)
end_content = matches[0]

In [15]:
start_links = get_page_links(start_page)
print(start_links)
checked_links = check_wikipedia_pages_existence(start_links)
valid_links = [link for link in start_links if checked_links[link]]

['Category:2006 in FIA GT', 'Manthey Racing', 'Porsche 911 GT3', 'Category:2006 in Belgian motorsport', 'Horst Felbermayr', 'Paul Belmondo', 'Jamie Davies', 'Dodge Viper', 'Marcel Fässler (racing driver)', 'Ferrari F430', 'Chrysler Viper GTS-R', 'Aston Martin DBR9', 'Marcel Tiemann', 'Eric van de Poele', 'Kurt Mollekens', 'Fabrizio Gollin', 'File:Circuit de Spa-Francorchamps 2004-2006.png', 'Iradj Alexander', 'Proximus', 'Scuderia Italia', '2006 FIA GT Oschersleben 500km', 'Sascha Maassen', 'Fabio Babini', 'Jonny Kane', 'Maxime Soulet', 'Andrew Kirkaldy (racing driver)', 'Audi', 'Spa 24 Hours', 'Romain Dumas', 'François Duval', 'Karl Wendlinger', 'Johnny Mowlem', 'Jeroen Bleekemolen', 'Matteo Bobbi', 'Vitaphone Racing Team', 'Lucas Luhr', 'JetAlliance Racing', 'Allan Simonsen (racing driver)', 'Christian Pescatori', 'BMW M3', 'Jean-Philippe Belloc', 'Gillet', 'Philippe Haezebrouck', 'Michael Bartels', 'Mika Salo', 'FIA GT Championship', 'Gabriele Lancieri', 'Ford Motor Company', 'Vince

In [16]:
human = crawler_template
prompt = ChatPromptTemplate.from_messages([("system", crawler_system), ("human", human)])
chain = prompt | chat
current_page = start_page
current_links = valid_links
for i in range(10):
    text = chain.invoke(
        {
            "current_page": current_page,
            "current_links": current_links,
            "end_page": final_page,
            "end_page_content": end_content
        }
    )
    model_output = text.content
    print(model_output)
    pattern = r'<output>(.*?)</output>'
    matches = re.findall(pattern, model_output)
    current_page = matches[0]
    if current_page == "Random":
        current_page = random.choice(current_links)
    if current_page != final_page:
        found_links = get_page_links(current_page)
        checked_links = check_wikipedia_pages_existence(start_links)
        current_links = [link for link in start_links if checked_links[link]]
    else:
        print(f"Page reached in {i} iterations!")
        break

Here is my reasoning and link choice:

<reasoning>
The goal page is "United States", and the current page is "2006 Spa 24 Hours". None of the links directly mention the United States, so I will need to find a link that could potentially lead me to the goal page through a series of connections.

The link that seems most promising is "Chevrolet Corvette C6.R", as Chevrolet is an American car manufacturer, and the Corvette is a well-known American sports car. This could potentially lead to a page about the United States or American automotive history.
</reasoning>

<output>
<output>Chevrolet Corvette C6.R</output>
</output>
Here is my reasoning and link choice:

<reasoning>
The goal page is "United States", and the current page is "Chevrolet Corvette C6.R". The available links do not seem to have any direct connection to the United States. The links are mostly related to motorsports, car manufacturers, and racing events. None of the links appear to be closely related to the United States.