## Load libraries

In [3]:
import os
import time
import random
import pandas as pd
from openai import OpenAI
from getpass import getpass
from textworld_express import TextWorldExpressEnv

## Create an API client
The cell creates a client for OpenAI's API, and it will ask for a key to run the code.

In [4]:
os.environ['OPENAI_API_KEY'] = getpass('OpenAI API key:')

open_ai_client = OpenAI()

## Query an LLM

In [5]:
def ask_openai(model, messages):
  ### The following code is inspired by OpenAI's API docs ###
  completion = open_ai_client.chat.completions.create(
      model = model, messages = messages)
  return completion.choices[0].message.content

## Build the prompts

The function react_1 builds a prompt where think and act steps are combined within a single prompt. The LLM is asked to think and then select the most appropriate action from the list of available actions.

The function react_2 builds a prompt where think and act steps are divided into two different prompts. The LLM is first asked to think about the best ways to achieve the task based on what it has observed from the environment. Then, in a second prompt, the model is asked to select the action.

In both cases the LLM has access to a limited context (5 most recent interactions, including both prompts and the reasoning traces).

In [20]:
def random_agent(model, task, obs, valid_actions):
    return random.choice(valid_actions)

In [None]:
def basic_prompt(model, task, obs, valid_actions): 
    global messages 
    if not len(messages):
        messages.append({"role": "system", "content": f'You are in the middle of a game that checks common sense. \
Your objective in this game is the following: {task}'})
    messages.append({"role": "user", "content": f'{obs} Now, given your objective, think how you could achieve it and \
what steps you could take. Remember: there may be multiple locations, \
and you want to avoid repeating things again and again. Now, taking into account \
your thoughts, choose one most reasonable action out of these: {valid_actions}. \
The action has to be in the list and you must respond with the action only, using the same spelling.'})
    messages.append({"role": "assistant", "content": f'{ask_openai(model, messages)}'})
    messages = [messages[0]] + messages[1:][-11:]
    return messages[-1]["content"].replace("'", "")

In [None]:
def react_1(model, task, obs, valid_actions): 
    global messages 
    if not len(messages):
        messages.append({"role": "system", "content": f'You are in the middle of a game that checks common sense. \
Your objective in this game is the following: {task}'})
    messages.append({"role": "user", "content": f'{obs} Now, given your objective, think how you could achieve it and \
what steps you could take. Be concise when reasoning, and after generating\
your thoughts, choose one most reasonable action out of these: {valid_actions}. \
The action has to be in the list and you must place the action with # on each side after your thoughts.'})
    messages.append({"role": "assistant", "content": f'{ask_openai(model, messages)}'})
    messages = [messages[0]] + messages[1:][-11:]
    return messages[-1]["content"].replace("'", "").split('#')[1]

In [None]:
def react_2(model, task, obs, valid_actions): 
    global messages 
    if not len(messages):
        messages.append({"role": "system", "content": f'You are in the middle of a game that checks common sense. \
Your objective in this game is the following: {task}'})
    messages.append({"role": "user", "content": f'{obs} Now, given your objective, think how you could achieve it and \
what steps you could take. Do not take any action yet, just think. Be concise.'})
    messages.append({"role": "assistant", "content": f'{ask_openai(model, messages)}'})
    messages.append({"role": "user", "content": f'Remember: there may be multiple locations, \
and if you see that you are going in circles, do not do the same thing again. Now, taking into account \
your previous thoughts, choose one most reasonable action out of these: {valid_actions}. \
The action has to be in the list and you must respond with the action only, using the same spelling.'})
    messages.append({"role": "assistant", "content": f'{ask_openai(model, messages)}'})
    messages = [messages[0]] + messages[1:][-21:]
    return messages[-1]["content"].replace("'", "")

## Construct the simulation environment

The simulation environment code below uses the TextWorldExpress library. As a consequence, a lot of code is the result of adaptation from the original code by TextWorldExpress authors.

The play() takes one of the OpenAI model names as the argument, the react agent as the second, episode range and number of steps as the third, and if the export flag is set to True, it will write the results of the simulation as a .csv file to the data folder.

In [17]:
### The code in this cell borrows from the TextWorldExpress Github repository on multiple occasions###
def play(model, react_func, n_episodes=(0, 50), n_steps=50, export=False):
  global messages
  env = TextWorldExpressEnv(envStepLimit=n_steps)
  # Set the game generator to generate a particular game (cookingworld, twc, or coin)
  env.load(gameName="twc", gameParams='') #gameParams=f'numLocations={random.randint(1, 2)},\
                                        #includeDoors=1, numItemsToPutAway={random.randint(3,4)}')

  # Then, randomly generate and play 10 games within the defined parameters
  episode_data = []
  for episode_id in range(n_episodes[0], n_episodes[1]):
    # First step
    obs, infos = env.reset(seed=episode_id, gameFold="train", generateGoldPath=True)
    task = infos['taskDescription']

    for step_id in range(n_steps):
      # Select a random valid action
      validActions = sorted(infos['validActions'])
      llm_action = react_func(model, task, obs, validActions)

      # Take that action
      obs, reward, done, infos = env.step(llm_action)

      # Display action and the game's feedback.
      print(">", llm_action)
      print(obs)
      print(infos['score'], infos['numMoves'])

      if infos['tasksuccess']:
          print("Task Success!")
      if infos['taskfailure']:
          print("Task Failure!")
      if infos['done'] or step_id == n_steps-1:
        episode_data.append([episode_id, infos['score'], infos['numMoves'], infos['done']])
        break
    messages = [] 
  if export:
     episode_data = pd.DataFrame(episode_data, columns=['episode_id', 'score', 'num_moves', 'done'])
     episode_data.to_csv(f'data/{model}_{react_func.__name__}_{n_episodes}episodes_{n_steps}steps_{str(time.time())[:10]}.csv', index=False)

## Generate data

Finally, the cell below will simulate 50 episodes of the TextWorldExpress Commonsense game for each of the agents we have built. The maximum number of steps per episode will be capped at 50. Then it will save the run data as two separate .csv files into the /data folder.

In [None]:
messages = []

play('gpt-4o-mini', random_agent, export=True)
play('gpt-4o-mini', basic_prompt, export=True)
play('gpt-4o-mini', react_1, export=True)
play('gpt-4o-mini', react_2, export=True)