## 1. Run Combo Lock Interaction (verl)

In [3]:
import dotenv
dotenv.load_dotenv('../.env')

True

In [1]:
from verl.interactions.combolock_interaction import ComboLockInteraction

interaction = ComboLockInteraction(config={})

instance_id = await interaction.start_interaction(
    instance_id=None,
    combination_length=3,
    max_attempts=8,
    vocab='0123456789',
    ground_truth='835',
    format='interaction_think',
)
env = interaction._instance_dict[instance_id]['env']

In [2]:
guesses = ['123', '456', '789', '835']  # example guesses
attempts = 0
game_history = []

while attempts < env.max_attempts:

    print(f"\nAttempt {attempts + 1}/{env.max_attempts}:")

    attempts += 1
    guess = guesses[attempts - 1]
    messages = [
        {"role": "user", "content": f"Make guess for {env.combination_length}-digit \combination"},
        {"role": "assistant", "content": f"<action>{guess}</action>"}
    ]
    
    # Get response from the interaction
    done, response, score, additional_data = await interaction.generate_response(
        instance_id=instance_id,
        messages=messages
    )
    
    print(f"Response: {response}")
    print(f"Score: {score}")
    
    # Record the attempt
    game_history.append({
        "attempt": attempts,
        "guess": guess,
        "response": response,
        "score": score
    })
    
    if done:
        print(f"\n🎉 Game completed! Goal reached: {done}")
        break


Attempt 1/8:
Response: 1 is not in the lock
2 is not in the lock
3 is not in Position 3, but is in the lock
Score: 0.0

Attempt 2/8:
Response: 4 is not in the lock
5 is not in Position 2, but is in the lock
6 is not in the lock
Score: 0.0

Attempt 3/8:
Response: 7 is not in the lock
8 is not in Position 2, but is in the lock
9 is not in the lock
Score: 0.0

Attempt 4/8:
Response: 8 is in Position 1!
3 is in Position 2!
5 is in Position 3!
Score: 0.625

🎉 Game completed! Goal reached: True


# 2. Run Paprika Interaction (verl) in the same way

In [1]:
import sys
sys.path.append('../paprika') # Remove this when verl and paprika are installed in the same env
from llm_exploration.paprika_config_helper import PaprikaConfigHelper
from verl.interactions.paprika_interaction import PaprikaInteraction
import dotenv
dotenv.load_dotenv('../.env')
from pprint import pprint as pp

GAME_NAME = "twenty_questions"  # change to e.g. "twenty_questions", "mastermind", etc.
config = PaprikaConfigHelper.create_config(GAME_NAME)
config['belief_config']['style'] = 'none'  # no belief right now

Could not import JerichoInferenceEngine, so cannot use it!
Could not import VLLMInferenceEngine, so cannot use it!


In [2]:
# config

In [2]:
sample_actions = [
    'Is it alive and judging you silently?',
    'Could you eat it without alarming your neighbors?',
    'Is it so big that it would crush a shoe in revenge?',
    'Is it always blue, even on its days off?',
    'Would it be suspicious to find it in your pocket?',
    'Could it survive being yeeted off a skyscraper?',
    'Would NASA high-five you if you found it in space?',
    'Does it make noise when nobody’s listening?',
    'Would it be comfy to sit on for exactly 7 hours?',
    'Could it vanish just because you blinked too hard?'
]

interaction = PaprikaInteraction(config={})

instance_id = await interaction.start_interaction(
        instance_id=None,
        **config
    )

The inference engine has been reset to the start state!
The inference engine has been reset to the start state!
The inference engine has been reset to the start state!


In [3]:
attempts = 0
game_history = []
# max_attempts = interaction._instance_dict[instance_id]['max_turns']
max_attempts = 6

In [4]:
interaction._instance_dict[instance_id]['scenario']

{'env': 'The Space Shuttle Challenger Disaster', 'agent': 'historical event'}

In [5]:
while attempts < max_attempts:
        
    print(f"\nAttempt {attempts + 1}/{max_attempts}:")
    attempts += 1
    action = sample_actions[attempts - 1]
    messages = [
        {"role": "user", "content": f"Output the next action."},
        {"role": "assistant", "content": f"<action>{action}</action>"}
    ]

    done, response, score, additional_data = await interaction.generate_response(
        instance_id=instance_id,
        messages=messages
    )
    
    print(f"Question: {action}")
    print(f"Response: {response}\n")
    
    game_history.append({
        "attempt": attempts,
        "guess": action,
        "response": response,
        "score": score
    })


Attempt 1/6:


Question: Is it alive and judging you silently?
Response: No


Attempt 2/6:
Question: Could you eat it without alarming your neighbors?
Response: No


Attempt 3/6:
Question: Is it so big that it would crush a shoe in revenge?
Response: No


Attempt 4/6:
Question: Is it always blue, even on its days off?
Response: No


Attempt 5/6:
Question: Would it be suspicious to find it in your pocket?
Response: Yes


Attempt 6/6:
Question: Could it survive being yeeted off a skyscraper?
Response: No



In [6]:
pp(additional_data['conversation'], width=120)

[{'content': 'You are a helpful assistant.', 'role': 'system'},
 {'content': 'You are playing a game of 20 Questions. Your goal is to guess the name of a thing or person by asking up '
             "to 20 yes-or-no questions. After each question, you will receive an answer: 'Yes' or 'No.' Use the "
             'answers provided to refine your guesses.\n'
             '\n'
             'Here are your instructions:\n'
             '- You can ask only yes-or-no questions.\n'
             '- After receiving each answer, you should adapt your questions based on the new information.\n'
             '- Your goal is to guess the topic in as few questions as possible.\n'
             "- If you're confident, you can make a guess before reaching 20 questions.\n"
             '\n'
             'The game starts now. You are trying to guess a historical event. Ask your first question!',
  'role': 'user'},
 {'content': 'is it alive and judging you silently?', 'role': 'assistant'},
 {'content': 'No',