In [6]:
import requests
from getpass import getpass
from lib.scraper import WikiSearch
from lib.wikienv import WikiEnv
from lib.wrappers import HotPotQAWrapper, LoggingWrapper
from lib.bot_interface import Bot

API_KEY = getpass('Enter your OpenAI key: ')

In [23]:
# AGENT = 'teaLLM'
# AGENT = 'ReAct'
AGENT = 'CoT'

In [24]:
ENVS = {'teaLLM' : WikiSearch(api_key=API_KEY),
        'ReAct'  : WikiEnv(),
        'CoT'    : WikiEnv()} 
env = ENVS[AGENT]
env = HotPotQAWrapper(env, split='dev')
env = LoggingWrapper(env)
bot = Bot(api_key=API_KEY)

def step(env, action):
    attempts = 0
    while attempts < 10:
        try:
            return env.step(action)
        except requests.exceptions.Timeout:
            attempts += 1

In [52]:
folder = './prompts/'
prompt_file = folder + f'{AGENT}.txt'
with open(prompt_file, 'r') as f:
    webthink_prompt = '\n'.join(f.readlines())
    
def webthink(question=None, idx=None, bot=bot, prompt=webthink_prompt, to_print=True):
    if not (idx is None):
        question = env.reset(idx=idx)
    else:
        env.reset()
    if to_print and not idx is None: print(idx, question)
    prompt += question + "\n"
    if AGENT == 'CoT':
        steps = 0
        replies = ''
        while steps < 10:
            reply = bot(prompt)
            prompt += reply
            replies += reply
            if 'finish[' in replies.lower(): break
            steps += 1
        if to_print: print(prompt)
        answer = ''
        if 'finish[' in replies.lower():
            answer = replies.lower().split('finish[')[-1][:-1]
        obs, r, done, info = env.step(f'finish[{answer}]')
        assert done
        if not (idx is None) and to_print: print(info, '\n')
        return r, info
    
    n_calls, n_badcalls = 0, 0
    for i in range(1, 8):
        n_calls += 1
        thought_action = bot(prompt + f"Thought {i}:", stop=[f"\nObservation {i}:"])
        try:
            thought, action = thought_action.strip().split(f"\nAction {i}: ")
        except:
            print('ohh...', thought_action)
            n_badcalls += 1
            n_calls += 1
            thought = thought_action.strip().split('\n')[0]
            action = bot(prompt + f"Thought {i}: {thought}\nAction {i}:", stop=[f"\n"]).strip()
            
        obs, r, done, info = step(env, action[0].lower() + action[1:])
        obs = obs.replace('\\n', '')
        step_str = f"Thought {i}: {thought}\nAction {i}: {action}\nObservation {i}: {obs}\n"
        prompt += step_str
        if to_print: print(step_str)
        if done: break
    if not done:
        finish_str = 'finish | NO ANSWER' if AGENT == 'teaLLM' else 'finish[NO ANSWER]'
        obs, r, done, info = step(env, finish_str)
        
    if not (idx is None) and to_print: print(info, '\n')
    info.update({'n_calls': n_calls, 'n_badcalls': n_badcalls, 'traj': prompt})
    return r, info

## Custom questions

In [53]:
# teamLLM should generally provide more comprehensive and informative answers
# answer: Ada Lovelace architecture, third-generation RT cores, fourth-generation Tensor Cores
_ = webthink(question='Why is the RTX 4090 so fast?',
             to_print=True)

Solve a question answering task with intermediate reasoning steps. When done, do Finish[answer].

Here are some examples.



Question: Rainforest Ecological Train runs through a national park with an area of how many Kilometers ?

Reasoning Step 1: I need to figure out the national park containing Rainforest Ecological Train.

Reasoning Step 2: The train runs through the Iguazu National Park in Argentina. I need to figure out the area of this park.

Reasoning Step 3: The Iguazu National Park has an area of 677 km squared.

Answer: Finish[677]

Question: What is the population of the town that gets it's water supply from Canobie Lake?

Reasoning Step 1: I need to first figure out the town that gets its water supply from Canobie Lake.

Reasoning Step 2: The town of Salem, New Hampshire gets its water supply from the lake. I need to figure out the population of this town.

Reasoning Step 3: The population of Salem, New Hampshire is 30,089.

Answer: Finish[30,089]

Question: In between The

## HotPotQA

In [37]:
import time, json
from tqdm import tqdm

idxs = list(range(7405))
rs = []
infos = []
old_time = time.time()
BEGIN = 0
END = 14
for i in tqdm(idxs[BEGIN:END]):
# for i in tqdm(idxs[758:1000]):
    print(f'INDEX = {i}')
    r, info = webthink(idx=i, to_print=True)
    rs.append(info['em'])
    infos.append(info)
    print(sum(rs), len(rs), sum(rs) / len(rs), (time.time() - old_time) / len(rs))
    print('-----------')
    print()

with open(f'{AGENT}_temp.json', 'w') as f:
    json.dump(infos, f)

  0%|          | 0/14 [00:00<?, ?it/s]

INDEX = 0
0 Question: Were Scott Derrickson and Ed Wood of the same nationality?


  7%|▋         | 1/14 [00:00<00:08,  1.61it/s]

Reasoning Step 1: I need to figure out the nationality of Scott Derrickson.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the nationality of scott derrickson.', 'gt_answer': 'yes', 'question_idx': 0, 'reward': False, 'em': False, 'f1': 0} 

0 1 0.0 0.6229798793792725
-----------

INDEX = 1
1 Question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?


 14%|█▍        | 2/14 [00:01<00:07,  1.51it/s]

Reasoning Step 1: I need to figure out who portrayed Corliss Archer in the film Kiss and Tell.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out who portrayed corliss archer in the film kiss and tell.', 'gt_answer': 'Chief of Protocol', 'question_idx': 1, 'reward': False, 'em': False, 'f1': 0} 

0 2 0.0 0.6564979553222656
-----------

INDEX = 2
2 Question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?


 21%|██▏       | 3/14 [00:01<00:07,  1.54it/s]

Reasoning Step 1: I need to figure out a science fantasy young adult series told in first person.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out a science fantasy young adult series told in first person.', 'gt_answer': 'Animorphs', 'question_idx': 2, 'reward': False, 'em': False, 'f1': 0} 

0 3 0.0 0.6477382977803549
-----------

INDEX = 3
3 Question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?


 29%|██▊       | 4/14 [00:02<00:06,  1.47it/s]

Reasoning Step 1: I need to figure out the neighborhoods where the Laleli Mosque and Esma Sultan Mansion are located.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the neighborhoods where the laleli mosque and esma sultan mansion are located.', 'gt_answer': 'no', 'question_idx': 3, 'reward': False, 'em': False, 'f1': 0} 

0 4 0.0 0.6676702499389648
-----------

INDEX = 4
4 Question: The director of the romantic comedy "Big Stone Gap" is based in what New York city?


 36%|███▌      | 5/14 [00:03<00:05,  1.58it/s]

Reasoning Step 1: I need to figure out who directed the romantic comedy "Big Stone Gap."
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out who directed the romantic comedy "big stone gap."', 'gt_answer': 'Greenwich Village, New York City', 'question_idx': 4, 'reward': False, 'em': False, 'f1': 0} 

0 5 0.0 0.6437180042266846
-----------

INDEX = 5
5 Question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?


 43%|████▎     | 6/14 [00:03<00:05,  1.54it/s]

Reasoning Step 1: I need to figure out the South Korean boy group that released the debut album 2014 S/S.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the south korean boy group that released the debut album 2014 s/s.', 'gt_answer': 'YG Entertainment', 'question_idx': 5, 'reward': False, 'em': False, 'f1': 0} 

0 6 0.0 0.650302012761434
-----------

INDEX = 6
6 Question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?


 50%|█████     | 7/14 [00:04<00:04,  1.59it/s]

Reasoning Step 1: I need to figure out the real name of the person known by the stage name Aladin.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the real name of the person known by the stage name aladin.', 'gt_answer': 'Eenasul Fateh', 'question_idx': 6, 'reward': False, 'em': False, 'f1': 0} 

0 7 0.0 0.641059296471732
-----------

INDEX = 7
7 Question: The arena where the Lewiston Maineiacs played their home games can seat how many people?


 57%|█████▋    | 8/14 [00:05<00:03,  1.58it/s]

Reasoning Step 1: I need to figure out the arena where the Lewiston Maineiacs played their home games.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the arena where the lewiston maineiacs played their home games.', 'gt_answer': '3,677 seated', 'question_idx': 7, 'reward': False, 'em': False, 'f1': 0} 

0 8 0.0 0.6416639983654022
-----------

INDEX = 8
8 Question: Who is older, Annie Morton or Terry Richardson?


 64%|██████▍   | 9/14 [00:05<00:03,  1.59it/s]

Reasoning Step 1: I need to figure out the birthdate of Annie Morton.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the birthdate of annie morton.', 'gt_answer': 'Terry Richardson', 'question_idx': 8, 'reward': False, 'em': False, 'f1': 0} 

0 9 0.0 0.6383335590362549
-----------

INDEX = 9
9 Question: Are Local H and For Against both from the United States?


 71%|███████▏  | 10/14 [00:06<00:02,  1.60it/s]

Reasoning Step 1: I need to figure out the origin of Local H.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the origin of local h.', 'gt_answer': 'yes', 'question_idx': 9, 'reward': False, 'em': False, 'f1': 0} 

0 10 0.0 0.6360283851623535
-----------

INDEX = 10
10 Question: What is the name of the fight song of the university whose main campus is in Lawrence, Kansas and whose branch campuses are in the Kansas City metropolitan area?


 79%|███████▊  | 11/14 [00:07<00:02,  1.46it/s]

Reasoning Step 1: I need to figure out the university with a main campus in Lawrence, Kansas and branch campuses in the Kansas City metropolitan area.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the university with a main campus in lawrence, kansas and branch campuses in the kansas city metropolitan area.', 'gt_answer': 'Kansas Song', 'question_idx': 10, 'reward': False, 'em': False, 'f1': 0.08} 

0 11 0.0 0.6525004560297186
-----------

INDEX = 11
11 Question: What screenwriter with credits for "Evolution" co-wrote a film starring Nicolas Cage and Téa Leoni?


 86%|████████▌ | 12/14 [00:07<00:01,  1.44it/s]

Reasoning Step 1: I need to figure out the screenwriter who co-wrote a film starring Nicolas Cage and Téa Leoni.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the screenwriter who co-wrote a film starring nicolas cage and téa leoni.', 'gt_answer': 'David Weissman', 'question_idx': 11, 'reward': False, 'em': False, 'f1': 0} 

0 12 0.0 0.6578000982602438
-----------

INDEX = 12
12 Question: What year did Guns N Roses perform a promo for a movie starring Arnold Schwarzenegger as a former New York Police detective?


 93%|█████████▎| 13/14 [00:08<00:00,  1.43it/s]

Reasoning Step 1: I need to figure out the movie starring Arnold Schwarzenegger as a former New York Police detective.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out the movie starring arnold schwarzenegger as a former new york police detective.', 'gt_answer': '1999', 'question_idx': 12, 'reward': False, 'em': False, 'f1': 0} 

0 13 0.0 0.6626546199505146
-----------

INDEX = 13
13 Question: Are Random House Tower and 888 7th Avenue both used for real estate?


100%|██████████| 14/14 [00:09<00:00,  1.48it/s]

Reasoning Step 1: I need to figure out if Random House Tower and 888 7th Avenue are both used for real estate.
{'steps': 1, 'answer': 'reasoning step 1: i need to figure out if random house tower and 888 7th avenue are both used for real estate.', 'gt_answer': 'no', 'question_idx': 13, 'reward': False, 'em': False, 'f1': 0} 

0 14 0.0 0.6738048621586391
-----------




