In [20]:
import sys, os
sys.path.append('..')
root  = '../root/'

In [21]:
import joblib
from util import summarize_react_trial, log_react_trial, save_agents
from agents import ReactReflectAgent, ReactAgent, ReflexionStrategy
from llm import ClaudeLLM

In [None]:
os.environ["ANTHROPIC_API_KEY"] = "KEY"
llm = ClaudeLLM(model_name="claude-3-haiku-20240307", temperature=0.0, max_tokens=250)

✓ Claude model initialized: claude-3-haiku-20240307


#### Load the HotpotQA Sample

In [23]:
hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)
#hotpot=hotpot.head(2)
print(f"Loaded {len(hotpot)} questions")

Loaded 100 questions


#### Define the Reflexion Strategy

In [24]:
print(ReflexionStrategy.__doc__)


    NONE: No reflection
    LAST_ATTEMPT: Use last reasoning trace in context 
    REFLEXION: Apply reflexion to the next reasoning trace 
    LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace 
    


In [25]:
strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION

#### Initialize a React Agent for each question

In [26]:
agent_cls = ReactReflectAgent if strategy != ReflexionStrategy.NONE else ReactAgent
agents = [agent_cls(row['question'], row['answer'], react_llm=llm, reflect_llm=llm) if strategy != ReflexionStrategy.NONE 
          else agent_cls(row['question'], row['answer'], react_llm=llm) for _, row in hotpot.iterrows()]

#### Run `n` trials

In [None]:
n = 5  
trial = 0
log = ''

In [28]:
for i in range(n):
    for agent in [a for a in agents if not a.is_correct()]:
        if strategy != ReflexionStrategy.NONE:
            agent.run(reflect_strategy = strategy)
        else:
            agent.run()
        print(f'Answer: {agent.key}')
    trial += 1
    log += log_react_trial(agents, trial)
    correct, incorrect, halted = summarize_react_trial(agents)
    print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}, Halted: {len(halted)}')

Thought 1: Search[VIVA Media AG]
Action 1: Search[VIVA Media AG]
Observation 1: Could not find [VIVA Media AG]. Similar: ['MTV Music (Poland)', 'Paramount International Networks', 'VIVA Plus', 'VIVA Germany', 'Viacom (1952–2005)', 'Vauxhall Viva', 'Sartorius AG', 'Mirai (2025 film)', 'Kimberly-Clark', 'Spellbound Entertainment']
Thought 2: Search[VIVA Media]
Action 2: Search[VIVA Media]
Observation 2: Could not find [VIVA Media]. The search returned a different page ("Via media"). Try searching for a related topic or more specific terms.
Thought 3: Search[VIVA Media name change]
Action 3: Search[VIVA Media name change]




  lis = BeautifulSoup(html).find_all('li')


Observation 3: Could not find [VIVA Media name change]. Similar: ['Viva', 'Viva Hate', 'Viva La Dirt League', 'Viva (airline)', 'Viva Hot Babes', 'Viva Brother', 'Vauxhall Viva', 'Media conglomerate', 'Globoplay Novelas', 'Viva la Vida or Death and All His Friends']
Thought 4: I'm sorry, I could not find any information about VIVA Media AG changing its name in 2004 or what the new acronym stands for. The searches I performed did not return any relevant results about this company or a name change. Without more specific information, I am unable to determine the answer to this question. Please let me know if you have any other details that could help me research this further.
[ClaudeLLM] Response too long (413 chars), using Finish
Action 4: Finish[Unable to determine from available information]
Answer: Gesellschaft mit beschränkter Haftung
Thought 1: Search[Jonny Craig]
Action 1: Search[Jonny Craig]
Observation 1: Could not find [Jonny Craig]. The search returned a different page ("Jenny 

KeyboardInterrupt: 

#### Save the result log

In [None]:
with open(os.path.join(root, 'ReAct', 'claude', strategy.value, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:
    f.write(log)
save_agents(agents, os.path.join('ReAct', 'claude', strategy.value, 'agents'))