## 1. Run Combo Lock Interaction (verl)

In [3]:
import dotenv
dotenv.load_dotenv('../.env')

True

In [1]:
from verl.interactions.combolock_interaction import ComboLockInteraction

interaction = ComboLockInteraction(config={})

instance_id = await interaction.start_interaction(
    instance_id=None,
    combination_length=3,
    max_attempts=8,
    vocab='0123456789',
    ground_truth='835',
    format='interaction_think',
)
env = interaction._instance_dict[instance_id]['env']

In [2]:
guesses = ['123', '456', '789', '835']  # example guesses
attempts = 0
game_history = []

while attempts < env.max_attempts:

    print(f"\nAttempt {attempts + 1}/{env.max_attempts}:")

    attempts += 1
    guess = guesses[attempts - 1]
    messages = [
        {"role": "user", "content": f"Make guess for {env.combination_length}-digit \combination"},
        {"role": "assistant", "content": f"<action>{guess}</action>"}
    ]
    
    # Get response from the interaction
    done, response, score, additional_data = await interaction.generate_response(
        instance_id=instance_id,
        messages=messages
    )
    
    print(f"Response: {response}")
    print(f"Score: {score}")
    
    # Record the attempt
    game_history.append({
        "attempt": attempts,
        "guess": guess,
        "response": response,
        "score": score
    })
    
    if done:
        print(f"\n🎉 Game completed! Goal reached: {done}")
        break


Attempt 1/8:
Response: 1 is not in the lock
2 is not in the lock
3 is not in Position 3, but is in the lock
Score: 0.0

Attempt 2/8:
Response: 4 is not in the lock
5 is not in Position 2, but is in the lock
6 is not in the lock
Score: 0.0

Attempt 3/8:
Response: 7 is not in the lock
8 is not in Position 2, but is in the lock
9 is not in the lock
Score: 0.0

Attempt 4/8:
Response: 8 is in Position 1!
3 is in Position 2!
5 is in Position 3!
Score: 0.625

🎉 Game completed! Goal reached: True


# 2. Run Paprika Interaction (verl) in the same way

In [None]:
# Satvik: I've tested that the following envs work:
# twenty_questions
# guess_my_city
# murder_mystery
# customer_service
# wordle
# mastermind

import sys
sys.path.append('../paprika') # Remove this when verl and paprika are installed in the same env
from llm_exploration.paprika_config_helper import PaprikaConfigHelper
from verl.interactions.paprika_interaction import PaprikaInteraction
import dotenv
dotenv.load_dotenv('../.env')
from pprint import pprint as pp
paprika_games = ['twenty_questions', 'guess_my_city', 'murder_mystery', 'customer_service', 'wordle', 'cellular_automata', \
    'mastermind', 'battleship', 'minesweeper', 'bandit_bai_fixed_budget']

Could not import JerichoInferenceEngine, so cannot use it!
Could not import VLLMInferenceEngine, so cannot use it!


In [2]:
GAME_NAME = paprika_games[6]
config = PaprikaConfigHelper.create_config(GAME_NAME)
config['belief_config']['style'] = 'none'  # no belief right now

In [3]:
config

{'game_env_name': 'mastermind',
 'agent_config': {'model_type': 'gpt-4o-mini',
  'model_name': 'gpt-4o-mini',
  'model_max_length': 20000,
  'mode': 'agent'},
 'env_config': {'model_type': 'mastermind',
  'model_name': 'mastermind',
  'model_max_length': 20000,
  'mode': 'env'},
 'judge_config': {'model_type': 'mastermind',
  'model_name': 'mastermind',
  'model_max_length': 1000,
  'mode': 'judge'},
 'belief_config': {'style': 'none'}}

In [4]:
interaction = PaprikaInteraction(config={})

instance_id = await interaction.start_interaction(
        instance_id=None,
        scenario_id=42,
        **config,
    )

The inference engine has been reset to the start state!
The inference engine has been reset to the start state!
The inference engine has been reset to the start state!


In [5]:
interaction._instance_dict

{'c90ea6d3-29e7-45fa-b4e7-9e0db98d58b3': {'game_environment': <llm_exploration.game.game_environment.GameEnvironment at 0x7fd885beb0d0>,
  'game_simulator': <llm_exploration.game.game.GameSimulator at 0x7fd6e2ccc850>,
  'belief_config': {'style': 'none'},
  'agent_config': {'model_type': 'gpt-4o-mini',
   'model_name': 'gpt-4o-mini',
   'model_max_length': 20000,
   'mode': 'agent'},
  'env_config': {'model_type': 'mastermind',
   'model_name': 'mastermind',
   'model_max_length': 20000,
   'mode': 'env'},
  'judge_config': {'model_type': 'mastermind',
   'model_name': 'mastermind',
   'model_max_length': 1000,
   'mode': 'judge'},
  'max_turns': 12,
  'scenario': {'env': '8440', 'agent': 'secret code'}}}

In [6]:
pp(interaction._instance_dict[instance_id]['scenario'])

{'agent': 'secret code', 'env': '8440'}


In [7]:
interaction._instance_dict[instance_id]['max_turns']

12

In [8]:
sample_actions = [
    "Is it a physical object?",
    "truck",
    "Is it a kite?",
    "Is it typically found outdoors?",
    "Is it Kingston in Jamaica?",
    "It's Yvette right?",
    "Maybe battries are dead, try replacing?",
    "8440",
    "Is it usually flown in open spaces like parks or beaches?",
    "Is it an object that comes in many shapes and bright colors?",
]

In [9]:
attempts = 0
game_history = []
max_attempts = interaction._instance_dict[instance_id]['max_turns']
# max_attempts = 6

In [10]:
while attempts < max_attempts:
        
    print(f"\nAttempt {attempts + 1}/{max_attempts}:")
    attempts += 1
    action = sample_actions[attempts - 1]
    messages = [
        {"role": "user", "content": f"Output the next action."},
        {"role": "assistant", "content": f"<action>{action}</action>"}
    ]

    done, response, score, additional_data = await interaction.generate_response(
        instance_id=instance_id,
        messages=messages
    )
    
    print(f"Question: {action}")
    print(f"Response: {response}\n")
    
    game_history.append({
        "attempt": attempts,
        "guess": action,
        "response": response,
        "score": score,
        "done": done,
        "data": additional_data,
    })

    if "Goal reached" in response:
        break


Attempt 1/12:
Question: Is it a physical object?
Response: The code you guessed does not have 4 digits, but has 20. According to the rule of the game, you must generate a 4 digit guess.


Attempt 2/12:
Question: truck
Response: The code you guessed does not have 4 digits, but has 5. According to the rule of the game, you must generate a 4 digit guess.


Attempt 3/12:


Question: Is it a kite?
Response: The code you guessed does not have 4 digits, but has 10. According to the rule of the game, you must generate a 4 digit guess.


Attempt 4/12:
Question: Is it typically found outdoors?
Response: The code you guessed does not have 4 digits, but has 27. According to the rule of the game, you must generate a 4 digit guess.


Attempt 5/12:
Question: Is it Kingston in Jamaica?
Response: The code you guessed does not have 4 digits, but has 22. According to the rule of the game, you must generate a 4 digit guess.


Attempt 6/12:
Question: It's Yvette right?
Response: The code you guessed does not have 4 digits, but has 16. According to the rule of the game, you must generate a 4 digit guess.


Attempt 7/12:
Question: Maybe battries are dead, try replacing?
Response: The code you guessed does not have 4 digits, but has 34. According to the rule of the game, you must generate a 4 digit guess.


Attempt 8/12:
Question: 8440
Response: Goal reached



In [11]:
pp(game_history[-1]['data']['conversation'][2:], width=120)

[{'content': 'is it a physical object?', 'role': 'assistant'},
 {'content': 'The code you guessed does not have 4 digits, but has 20. According to the rule of the game, you must '
             'generate a 4 digit guess.\n'
             '\n'
             '\n'
             'Now make your next guess about the secret code. Please format your response as: <Think> Any '
             'step-by-step, short and concise thinking to determine what the next guess should be </Think>\n'
             ' <Answer> your guess on the 4 digit code </Answer>',
  'role': 'user'},
 {'content': 'truck', 'role': 'assistant'},
 {'content': 'The code you guessed does not have 4 digits, but has 5. According to the rule of the game, you must '
             'generate a 4 digit guess.\n'
             '\n'
             '\n'
             'Now make your next guess about the secret code. Please format your response as: <Think> Any '
             'step-by-step, short and concise thinking to determine what the next guess sh