In [8]:
import yaml
import alfworld
import alfworld.agents.environment

In [9]:
"""Base memory interface class."""
from abc import ABC, abstractmethod
from typing import Any, Dict


class BaseMemory(ABC):
    """Base memory class providing a general interface for memory operations."""

    @abstractmethod
    def clear(self, *args: Any, **kwargs: Any) -> None:
        """Clear all memories.

        Implementations should override this method to provide the functionality
        to clear memories. Specific parameters and return types depend on the implementation.
        """
        pass

    @abstractmethod
    def add_memories(self, *args: Any, **kwargs: Any) -> None:
        """Add memories.

        Implementations should override this method to provide the functionality
        to add memories. Specific parameters and return types depend on the implementation.
        """
        pass

    @abstractmethod
    def load_memories(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
        """Load memories and return a dictionary.

        Implementations should override this method to provide the functionality
        to load memories. Specific parameters and return types depend on the implementation.
        """
        pass

    @abstractmethod
    def show_memories(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
        """Show all memories.

        Implementations should override this method to provide the functionality
        to show memories. Specific parameters and return types depend on the implementation.
        """
        pass


In [10]:
"""ReAct's memory implementation.

Original Paper: https://arxiv.org/abs/2210.03629
Paper Repository: https://github.com/ysymyth/ReAct
LangChain: https://github.com/langchain-ai/langchain
LangChain ReAct: https://python.langchain.com/docs/modules/agents/agent_types/react
"""
from typing import Any, Dict, Optional



class ReActMemory(BaseMemory):
    """A memory storage class for ReAct.

    It stores, retrieves, and manages text-based memories (observations) in a scratchpad (str).

    Attributes:
        scratchpad (str): A string attribute that stores all the memories.
    """

    def __init__(self, scratchpad: Optional[str] = None) -> None:
        """Initialization."""
        super().__init__()
        self.scratchpad = scratchpad if scratchpad else ""

    def clear(
        self,
    ) -> None:
        """Clears the contents of the scratchpad.

        This method resets the scratchpad to an empty string, erasing all stored memories.
        """
        self.scratchpad = ""

    def add_memories(self, observation: str) -> None:
        """Adds a new observation to the scratchpad.

        This method appends the given observation text to the existing contents of the scratchpad.

        Args:
            observation (str): The observation text to be added to the memory.

        """
        self.scratchpad += observation

    def load_memories(self, input_key: str = "scratchpad") -> Dict[str, Any]:
        """Retrieves all stored memories.

        `show_memories` and `load_memories` are identical in ReAct's case.

        Args:
            input_key (str, optional): The key used to access memories. Defaults to "scratchpad".

        Returns:
            Dict[str, Any]: A dictionary containing the stored memories, accessible via the provided input key.
        """
        return {input_key: self.scratchpad}

    def show_memories(self, input_key: str = "scratchpad") -> Dict[str, Any]:
        """Displays all stored memories.

        Args:
            input_key (str, optional): The key used to access memories. Defaults to "scratchpad".

        Returns:
            Dict[str, Any]: A dictionary containing the stored memories, accessible via the provided input key.
        """
        return {input_key: self.scratchpad}


In [11]:
from base import BaseAgent 
import tiktoken
from langchain_core.language_models.chat_models import BaseChatModel
from typing import Any, Dict, List, Optional
from tiktoken.core import Encoding

class Alfworld(BaseAgent):
    def __init__(
        self,
        llm: BaseChatModel,
        memory: Optional[ReActMemory] = None,
        max_tokens: int = 3896,
        enc: Encoding = tiktoken.encoding_for_model("gpt-3.5-turbo"),
        env_config: str = 'base_config.yaml',
        max_step: int = 50
    ):


        super().__init__()
        self.llm = llm

        if not memory:
            self.memory = ReActMemory()
        else:
            self.memory = memory

        
        self.max_tokens = max_tokens
        self.max_step = max_step
        self.enc = enc

        self.env_config=env_config

        # Internal variables.
        self._finished = False  #: :meta private:
        self.prefixes = {
            'pick_and_place': 'put',
            'pick_clean_then_place': 'clean',
            'pick_heat_then_place': 'heat',
            'pick_cool_then_place': 'cool',
            'look_at_obj': 'examine',
            'pick_two_obj': 'puttwo'
        }



In [18]:
from langchain_community.chat_models.openai import ChatOpenAI

In [19]:
api_key = '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'

In [20]:
llm = ChatOpenAI(openai_api_key = api_key)

In [21]:
test_run = Alfworld(llm)

In [22]:
test_run.max_step

50

In [24]:
with open(test_run.env_config) as reader:
    config = yaml.safe_load(reader)
    
print('hello wolrd')
split = "eval_out_of_distribution"

env = getattr(alfworld.agents.environment, config["env"]["type"])(config, train_eval=split)
env = env.init_env(batch_size=1)



ob, info = env.reset()
ob = '\n'.join(ob[0].split('\n\n')[1:])
print(ob)


hello wolrd
Initializing AlfredTWEnv...
Checking for solvable games...


100%|██████████| 341/341 [00:00<00:00, 2650.06it/s]

Overall we have 134 games in split=eval_out_of_distribution
Evaluating with 134 games





You are in the middle of a room. Looking quickly around you, you see a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 3, a countertop 2, a countertop 1, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a shelf 3, a shelf 2, a shelf 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.
Your task is to: put a cool tomato in microwave.


In [25]:


REACT_ALFWORLD_INSTRUCTION = """
Here are some examples:
{examples}
(END OF EXAMPLES)
Statement: {question}{scratchpad}"""




REACT_ALFWORLD_ACT_EXAMINE_1 = """
You are in the middle of a room. Looking quickly around you, you see a bed 1, a drawer 10, a drawer 9, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a shelf 9, a shelf 8, a shelf 7, a shelf 6, a shelf 5, a shelf 4, a shelf 3, a shelf 2, and a shelf 1.
Your task is to: examine the pen with the desklamp.
Act 1: go to drawer 1
Obs 1: The drawer 1 is closed.
Act 2: open drawer 1
Obs 2: You open the drawer 1. The drawer 1 is open. In it, you see nothing.
Act 3: go to drawer 2
Obs 3: The drawer 2 is closed.
Act 4: open drawer 2
Obs 4: You open the drawer 2. The drawer 2 is open. In it, you see nothing.
Act 5: go to drawer 3
Obs 5: The drawer 3 is closed.
Act 6: open drawer 3
Obs 6: You open the drawer 3. The drawer 3 is open. In it, you see nothing.
Act 7: go to drawer 4
Obs 7: The drawer 4 is closed.
Act 8: open drawer 4
Obs 8: You open the drawer 4. The drawer 4 is open. In it, you see a cd 3, a keychain 1, and a pen 2.
Act 9: take pen 2 from drawer 4
Obs 9: You pick up the pen 2 from the drawer 4.
Act 10: go to dresser 1
Obs 10: On the dresser 1, you see a alarmclock 2, a alarmclock 1, a book 1, a cd 1, a creditcard 1, a desklamp 1, a keychain 2, a pen 3, and a pen 1.
Act 11: use desklamp 1
Obs 11: You turn on the desklamp 1.
Act 12: put pen 2 in/on dresser 1
Obs 12: You turn on the desklamp 1.

"""

In [26]:
from langchain.prompts import PromptTemplate
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages.human import HumanMessage
from tiktoken import Encoding

In [27]:


def remove_newline(step: str) -> str:
    """Formats a step string by stripping leading/trailing newlines and spaces, and replacing internal newlines with empty space.

    Args:
        step (str): The step string to be formatted.

    Returns:
        str: The formatted step string.
    """
    return step.strip("\n").strip().replace("\n", "")

In [28]:
def _build_alfworld_agent_prompt(question: str, scratchpad: str) -> str:
    """Constructs a prompt template for the agent.

    This function formats a predefined prompt template (REACT_INSTRUCTION) with examples,
    the provided question, and a scratchpad.

    Args:
        question (str): The question to be included in the prompt.
        scratchpad (str): Additional scratchpad information to be included.

    Returns:
        str: A formatted prompt template ready for use.
    """

    prompt = PromptTemplate.from_template(REACT_ALFWORLD_INSTRUCTION).format(
        examples=REACT_ALFWORLD_ACT_EXAMINE_1,
        question=question,
        scratchpad=scratchpad,
    )
    return prompt

def _alfworld_prompt_agent(llm: BaseChatModel, question: str, scratchpad: str) -> str:
    """Generates a response from the LLM based on a given question and scratchpad.

    This function creates a prompt using `_build_agent_prompt` and then gets the LLM's
    output. The newline characters in the output are removed before returning.

    Args:
        llm (BaseChatModel): The language model to be prompted.
        question (str): The question to ask the language model.
        scratchpad (str): Additional context or information for the language model.
        example (str): the example used for specific benchmark

    Returns:
        str: The processed response from the language model.
    """
    prompt = _build_alfworld_agent_prompt(question=question, scratchpad=scratchpad)
    out = llm(
        [
            HumanMessage(
                content=prompt,
            )
        ]
    ).content
    assert isinstance(out, str)
    return remove_newline(out)

In [32]:
ob, info = env.reset()
ob = '\n'.join(ob[0].split('\n\n')[1:])
name = '/'.join(info['extra.gamefile'][0].split('/')[-3:-1])


print(ob)
print(name)


You are in the middle of a room. Looking quickly around you, you see a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 3, a countertop 2, a countertop 1, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a shelf 3, a shelf 2, a shelf 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.
Your task is to: put a clean plate in countertop.
pick_clean_then_place_in_recep-Plate-None-CounterTop-10/trial_T20190908_213420_728917


In [37]:
import json
folder = './prompts/'
prompt_file = 'alfworld_3prompts.json'
with open(folder + prompt_file, 'r') as f:
    d = json.load(f)

FileNotFoundError: [Errno 2] No such file or directory: './prompts/alfworld_3prompts.json'

In [36]:
for i, (k, v) in enumerate(test_run.prefixes.items()):
    if name.startswith(k):
        prompt = 'Interact with a household to solve a task. Here are two examples.\n' + d[f'react_{v}_1'] + d[f'react_{v}_0'] + '\nHere is the task.\n'
        instruction = prompt + '{scratchpad}\n>'

NameError: name 'd' is not defined