In [64]:
import requests
from getpass import getpass
from lib.scraper import WikiSearch
from lib.wikienv import WikiEnv
from lib.wrappers import HotPotQAWrapper, LoggingWrapper
from lib.bot_interface import Bot

API_KEY = getpass('Enter your OpenAI key: ')

In [69]:
# AGENT = 'teaLLM'
AGENT = 'ReAct'
# AGENT = 'CoT'

In [70]:
ENVS = {'teaLLM' : WikiSearch(api_key=API_KEY),
        'ReAct'  : WikiEnv(),
        'CoT'    : WikiEnv()} 
env = ENVS[AGENT]
env = HotPotQAWrapper(env, split='dev')
env = LoggingWrapper(env)
bot = Bot(api_key=API_KEY)

def step(env, action):
    attempts = 0
    while attempts < 10:
        try:
            return env.step(action)
        except requests.exceptions.Timeout:
            attempts += 1

In [71]:
folder = './prompts/'
prompt_file = folder + f'{AGENT}.txt'
with open(prompt_file, 'r') as f:
    webthink_prompt = '\n'.join(f.readlines())
    
def webthink(question=None, idx=None, bot=bot, prompt=webthink_prompt, to_print=True):
    if not (idx is None):
        question = env.reset(idx=idx)
    else:
        env.reset()
    if to_print and not idx is None: print(idx, question)
    prompt += question + "\n"
    if AGENT == 'CoT':
        steps = 0
        replies = ''
        while steps < 10:
            reply = bot(prompt)
            prompt += reply
            replies += reply
            if 'finish[' in replies.lower(): break
            steps += 1
        if to_print: print(replies)
        answer = ''
        if 'finish[' in replies.lower():
            answer = replies.lower().split('finish[')[-1][:-1]
        obs, r, done, info = env.step(f'finish[{answer}]')
        assert done
        if not (idx is None) and to_print: print(info, '\n')
        return r, info
    
    n_calls, n_badcalls = 0, 0
    for i in range(1, 8):
        n_calls += 1
        thought_action = bot(prompt + f"Thought {i}:", stop=[f"\nObservation {i}:"])
        try:
            thought, action = thought_action.strip().split(f"\nAction {i}: ")
        except:
            print('ohh...', thought_action)
            n_badcalls += 1
            n_calls += 1
            thought = thought_action.strip().split('\n')[0]
            action = bot(prompt + f"Thought {i}: {thought}\nAction {i}:", stop=[f"\n"]).strip()
            
        obs, r, done, info = step(env, action[0].lower() + action[1:])
        obs = obs.replace('\\n', '')
        step_str = f"Thought {i}: {thought}\nAction {i}: {action}\nObservation {i}: {obs}\n"
        prompt += step_str
        if to_print: print(step_str)
        if done: break
    if not done:
        finish_str = 'finish | NO ANSWER' if AGENT == 'teaLLM' else 'finish[NO ANSWER]'
        obs, r, done, info = step(env, finish_str)
        
    if not (idx is None) and to_print: print(info, '\n')
    info.update({'n_calls': n_calls, 'n_badcalls': n_badcalls, 'traj': prompt})
    return r, info

## Custom questions

In [72]:
# teamLLM should generally provide more comprehensive and informative answers
# answer: Ada Lovelace architecture, third-generation RT cores, fourth-generation Tensor Cores
_ = webthink(question='Why is the RTX 4090 so fast?',
             to_print=True)

Thought 1: I need to search for information on the RTX 4090 to understand why it is so fast.

Action 1: Search[RTX 4090]
Observation 1: The GeForce 40 series is the latest family of consumer-level graphics processing units developed by Nvidia, succeeding the GeForce 30 series. The series was announced on September 20, 2022, at the GPU Technology Conference (GTC) 2022 event.. The RTX 4090 was released as the first model of the series on October 12, 2022, launched for $1,599 US,[1] and the 16GB RTX 4080 was released on November 16, 2022 for $1,199 US. An RTX 4080 12GB was announced in September 2022, originally to be priced at $899 US, however following some controversy in the media it was "unlaunched" by Nvidia. On January 5, 2023, that model would be released as the RTX 4070 Ti for $799 US.

Thought 2: The passage does not directly explain why the RTX 4090 is so fast. I need to look up the specifications or reviews of the RTX 4090 to find the answer.

Action 2: Lookup[RTX 4090 speed]
O

## HotPotQA

In [63]:
import time, json
from tqdm import tqdm

idxs = list(range(7405))
rs = []
infos = []
old_time = time.time()
BEGIN = 0
END = 14
for i in tqdm(idxs[BEGIN:END]):
# for i in tqdm(idxs[758:1000]):
    print(f'INDEX = {i}')
    r, info = webthink(idx=i, to_print=True)
    rs.append(info['em'])
    infos.append(info)
    print(sum(rs), len(rs), sum(rs) / len(rs), (time.time() - old_time) / len(rs))
    print('-----------')
    print()

with open(f'results/{AGENT}_temp.json', 'w') as f:
    json.dump(infos, f)

  0%|          | 0/14 [00:00<?, ?it/s]

INDEX = 0
0 Question: Were Scott Derrickson and Ed Wood of the same nationality?
Thought 1: I need to find out the nationalities of Scott Derrickson and Ed Wood to see if they are the same.

Action 1: search | Scott Derrickson | nationality | nationality
Observation 1: There is no information related to nationality in the provided text. Scott Derrickson is an American filmmaker known for his work in horror films and the superhero film Doctor Strange.

Thought 2: Since there is no information about Scott Derrickson's nationality, I should search for Ed Wood's nationality and compare it to Scott Derrickson's.

Action 2: search | Ed Wood | nationality | nationality
Observation 2: There is no information related to nationality in the provided text. 

Summary: Edward Davis Wood Jr. was an American filmmaker, actor, screenwriter, and pulp novelist known for directing low-budget cult classic films in the 1950s. He had a fascination with the performing arts and pulp fiction from a young age, a

  7%|▋         | 1/14 [00:23<05:09, 23.79s/it]

Thought 7: Since I couldn't find direct information about the nationalities of Scott Derrickson and Ed Wood, I should try to infer their nationalities based on the information provided. Ed Wood is described as an American filmmaker, so I can conclude that he is American. Now, I need to search for Scott Derrickson's nationality to see if they are the same.

Action 7: search | Scott Derrickson | nationality | nationality
Observation 7: There is no information related to nationality in the provided text. Scott Derrickson is an American filmmaker known for his work in horror films and the superhero movie Doctor Strange.

{'steps': 8, 'answer': ' NO ANSWER', 'gt_answer': 'yes', 'question_idx': 0, 'reward': False, 'em': False, 'f1': 0} 

0 1 0.0 23.791435956954956
-----------

INDEX = 1
1 Question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?
Thought 1: I need to find out what government position the woman who portrayed Corliss Archer

 14%|█▍        | 2/14 [00:38<03:38, 18.20s/it]

Thought 6: None of the actresses who played Corliss Archer in the film Kiss and Tell held a government position. The original question remains unanswered. I should finish the session.

Action 6: finish | No government position held
Observation 6: Episode finished, reward = 0


{'steps': 6, 'answer': ' No government position held', 'gt_answer': 'Chief of Protocol', 'question_idx': 1, 'reward': False, 'em': False, 'f1': 0} 

0 2 0.0 19.040143489837646
-----------

INDEX = 2
2 Question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?
Thought 1: I need to search for a science fantasy young adult series that is told in first person and has companion books about enslaved worlds and alien species. I should start by searching for science fantasy young adult series.

Action 1: search | science fantasy young adult series | what series is told in first person and has companion books about ensla

 21%|██▏       | 3/14 [01:06<04:12, 22.98s/it]

Thought 7: The search for Maximum Ride did not provide the information I needed. I should continue searching through the similar articles to find a series that fits the criteria.

Action 7: search | Great Plains | is this series told in first person and has companion books about enslaved worlds and alien species? | enslaved worlds, alien species
Observation 7: There is no information in the text related to a series told in first person with companion books about enslaved worlds and alien species. The text mainly discusses the history of the Great Plains in North America, including the arrival of the first Peoples, interactions with Europeans, the impact of diseases on the Plains Indians, and the development of ranching operations in the region.

{'steps': 8, 'answer': ' NO ANSWER', 'gt_answer': 'Animorphs', 'question_idx': 2, 'reward': False, 'em': False, 'f1': 0} 

0 3 0.0 22.25062171618144
-----------

INDEX = 3
3 Question: Are the Laleli Mosque and Esma Sultan Mansion located in the

 29%|██▊       | 4/14 [01:14<02:49, 16.96s/it]

Thought 3: The Esma Sultan Mansion is located in the Ortaköy neighborhood of Istanbul. Since the Laleli Mosque is in Laleli, Fatih, they are not in the same neighborhood. I can now answer the question.

Action 3: finish | No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood.
Observation 3: Episode finished, reward = 0


{'steps': 3, 'answer': ' No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood.', 'gt_answer': 'no', 'question_idx': 3, 'reward': False, 'em': False, 'f1': 0} 

0 4 0.0 18.619451701641083
-----------

INDEX = 4
4 Question: The director of the romantic comedy "Big Stone Gap" is based in what New York city?
Thought 1: I need to find the director of the movie "Big Stone Gap" and the New York city they are based in. I should start by searching for the director of the movie.

Action 1: search | Big Stone Gap movie director | who directed Big Stone Gap? | director
Observation 1: The article Big Stone Gap (film

 36%|███▌      | 5/14 [01:23<02:07, 14.17s/it]

Thought 4: Adriana Trigiani, the director of the movie "Big Stone Gap," is based in New York City. This answers the original question. I should finish the session.

Action 4: finish | New York City
Observation 4: Episode finished, reward = 0


{'steps': 4, 'answer': ' New York City', 'gt_answer': 'Greenwich Village, New York City', 'question_idx': 4, 'reward': False, 'em': False, 'f1': 0.7499999999999999} 

0 5 0.0 16.739107608795166
-----------

INDEX = 5
5 Question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?
Thought 1: I need to find out who formed the South Korean boy group that released the debut album 2014 S/S. I should start by searching for the album and the group.

Action 1: search | 2014 S/S album | who formed the group that released 2014 S/S? | formed by
Observation 1: Winner, a South Korean group, released the debut album "2014 S/S" on August 12, 2014, under their record label YG Entertainment. The members of Winner were credited for writ

 43%|████▎     | 6/14 [01:40<01:59, 14.99s/it]

Thought 5: The group Winner was formed by YG Entertainment in 2013 through Mnet's survival program WIN: Who Is Next. The original question asked who formed the South Korean boy group that released the debut album 2014 S/S, and the answer is YG Entertainment.

Action 5: finish | YG Entertainment
Observation 5: Episode finished, reward = 1


{'steps': 5, 'answer': ' YG Entertainment', 'gt_answer': 'YG Entertainment', 'question_idx': 5, 'reward': True, 'em': True, 'f1': 1.0} 

1 6 0.16666666666666666 16.714098811149597
-----------

INDEX = 6
6 Question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?
Thought 1: I need to search for a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.

Action 1: search | Aladin | who was known by the stage name Aladin and helped organizations improve their performance as a consultant? | consultant
Observation 1: There is no information 

 50%|█████     | 7/14 [02:16<02:34, 22.04s/it]

Thought 7: I should refine my search query to focus on finding a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.

Action 7: search | Aladin consultant | who was known by his stage name Aladin and helped organizations improve their performance as a consultant? | consultant
Observation 7: There is no information related to the question in the provided text. 

Summary: The text discusses various films released in 2009, including genres, notable movies, and performances.

{'steps': 8, 'answer': ' NO ANSWER', 'gt_answer': 'Eenasul Fateh', 'question_idx': 6, 'reward': False, 'em': False, 'f1': 0} 

1 7 0.14285714285714285 19.5489855834416
-----------

INDEX = 7
7 Question: The arena where the Lewiston Maineiacs played their home games can seat how many people?
Thought 1: I need to find out the seating capacity of the arena where the Lewiston Maineiacs played their home games. I should start by searching for information about t

 57%|█████▋    | 8/14 [02:30<01:56, 19.49s/it]

Thought 4: The seating capacity of The Colisée in Lewiston, Maine, where the Lewiston Maineiacs played their home games, is 3,677 with the ability to accommodate up to 4,000 patrons. This answers the original question. I should finish the task.

Action 4: finish | 3,677
Observation 4: Episode finished, reward = 0


{'steps': 4, 'answer': ' 3,677', 'gt_answer': '3,677 seated', 'question_idx': 7, 'reward': False, 'em': False, 'f1': 0.6666666666666666} 

1 8 0.125 18.858589500188828
-----------

INDEX = 8
8 Question: Who is older, Annie Morton or Terry Richardson?
Thought 1: I need to find out the ages of Annie Morton and Terry Richardson to determine who is older. I should start by searching for information on both individuals.

Action 1: search | Annie Morton | age | age
Observation 1: There is no information related to age in the provided text. Annie Morton, an American model born in Pennsylvania, has appeared on numerous magazine covers, worked with renowned photographers, and modeled

 64%|██████▍   | 9/14 [02:56<01:47, 21.53s/it]

Thought 7: I have exhausted all possible avenues to find the ages of Annie Morton and Terry Richardson. Since there is no definitive answer available, I cannot determine who is older between the two. I should finish the task with this conclusion.

Action 7: finish | Unable to determine who is older between Annie Morton and Terry Richardson
Observation 7: Episode finished, reward = 0


{'steps': 7, 'answer': ' Unable to determine who is older between Annie Morton and Terry Richardson', 'gt_answer': 'Terry Richardson', 'question_idx': 8, 'reward': False, 'em': False, 'f1': 0.2857142857142857} 

1 9 0.1111111111111111 19.65335578388638
-----------

INDEX = 9
9 Question: Are Local H and For Against both from the United States?
Thought 1: I need to determine the origins of the bands Local H and For Against. I should start by searching for information about both bands to see where they are from.

Action 1: search | Local H | where is Local H from? | origin
Observation 1: Local H is from Zion

 71%|███████▏  | 10/14 [03:09<01:15, 18.80s/it]

Thought 3: Local H is from Zion, Illinois, and For Against is from Lincoln, Nebraska, United States. Both bands are indeed from the United States. I can now answer the question.

Action 3: finish | Yes, both Local H and For Against are from the United States.
Observation 3: Episode finished, reward = 0


{'steps': 3, 'answer': ' Yes, both Local H and For Against are from the United States.', 'gt_answer': 'yes', 'question_idx': 9, 'reward': False, 'em': False, 'f1': 0} 

1 10 0.1 18.95745470523834
-----------

INDEX = 10
10 Question: What is the name of the fight song of the university whose main campus is in Lawrence, Kansas and whose branch campuses are in the Kansas City metropolitan area?
Thought 1: I need to find the university with the main campus in Lawrence, Kansas and branch campuses in the Kansas City metropolitan area, then search for the name of their fight song.

Action 1: search | University in Lawrence, Kansas with branch campuses in Kansas City metropolitan area | what i

 79%|███████▊  | 11/14 [03:32<01:00, 20.19s/it]

Thought 6: The fight song of the university with the main campus in Lawrence, Kansas and branch campuses in the Kansas City metropolitan area is called "Kansas Song (We’re From Kansas)." This answers the original question. I should finish the session.

Action 6: finish | "Kansas Song (We’re From Kansas)"
Observation 6: Episode finished, reward = 0


{'steps': 6, 'answer': ' "Kansas Song (We’re From Kansas)"', 'gt_answer': 'Kansas Song', 'question_idx': 10, 'reward': False, 'em': False, 'f1': 0.5714285714285715} 

1 11 0.09090909090909091 19.35657444867221
-----------

INDEX = 11
11 Question: What screenwriter with credits for "Evolution" co-wrote a film starring Nicolas Cage and Téa Leoni?
Thought 1: I need to find a screenwriter who has credits for "Evolution" and co-wrote a film starring Nicolas Cage and Téa Leoni. I should start by searching for the screenwriter of "Evolution" and then look for a film they co-wrote with Nicolas Cage and Téa Leoni.

Action 1: search | Evolution (film

 86%|████████▌ | 12/14 [04:09<00:50, 25.23s/it]

Thought 7: "Blue Thunder" was co-written by Don Jakoby, but it does not star Nicolas Cage and Téa Leoni. I will continue searching through the similar articles to find a film that meets all the criteria.

Action 7: search | Double Team (film) | co-written by Don Jakoby? | co-written
Observation 7: There is no information in the text provided about whether the film "Double Team" was co-written by Don Jakoby. 

Double Team is a 1997 action comedy film directed by Tsui Hark and written by Don Jakoby and Paul Mones. It stars Jean-Claude Van Damme as counter-terrorist agent Jack Quinn, who is assigned to bring an elusive terrorist known as Stavros (Mickey Rourke) to justice. The film received negative reviews and was nominated for three Golden Raspberry Awards.

{'steps': 8, 'answer': ' NO ANSWER', 'gt_answer': 'David Weissman', 'question_idx': 11, 'reward': False, 'em': False, 'f1': 0} 

1 12 0.08333333333333333 20.807194749514263
-----------

INDEX = 12
12 Question: What year did Guns N R

 93%|█████████▎| 13/14 [04:39<00:26, 26.65s/it]

Thought 7: Since I couldn't find any information about Guns N Roses performing a promo for "Raw Deal" either, I should try to think of another movie starring Arnold Schwarzenegger as a former New York Police detective and search for that movie to find the year of the promo.

Thought 8: I should search for Arnold Schwarzenegger movies where he plays a former New York Police detective and look for any promos involving Guns N Roses.

Action 7: search | Arnold Schwarzenegger
Observation 7: Invalid input. Please try again with correct syntax.

{'steps': 8, 'answer': ' NO ANSWER', 'gt_answer': '1999', 'question_idx': 12, 'reward': False, 'em': False, 'f1': 0} 

1 13 0.07692307692307693 21.506892461043137
-----------

INDEX = 13
13 Question: Are Random House Tower and 888 7th Avenue both used for real estate?
Thought 1: I need to find out if Random House Tower and 888 7th Avenue are both used for real estate. I should start by searching for information on both buildings.

Action 1: search | R

100%|██████████| 14/14 [04:49<00:00, 20.71s/it]

Thought 3: I have found that Random House Tower is used for real estate, but there is no information about 888 7th Avenue being used for real estate. Since the question asked if both buildings are used for real estate, I cannot confirm for 888 7th Avenue based on the information available. I should finish the session.

Action 3: finish | Cannot confirm for 888 7th Avenue
Observation 3: Episode finished, reward = 0


{'steps': 3, 'answer': ' Cannot confirm for 888 7th Avenue', 'gt_answer': 'no', 'question_idx': 13, 'reward': False, 'em': False, 'f1': 0} 

1 14 0.07142857142857142 20.70925828388759
-----------




