In [1]:
import openai
from config import LLMDPConfig

openai.api_key = LLMDPConfig.openai_api_key

In [3]:
import yaml
import alfworld
import alfworld.agents.environment

with open(f"{LLMDPConfig.alfworld_config_path}/base_config.yaml") as reader:
    config = yaml.safe_load(reader)

split = "eval_out_of_distribution"
# split = "train"

# UPDATE PATH TO ALFWORLD DATA
for k in config:
    for i, j in config[k].items():
        if type(j) == str and j.startswith("$"):
            config[k][i] = config[k][i].replace(
                "$ALFWORLD_DATA", LLMDPConfig.alfworld_data_path
            )

env = getattr(alfworld.agents.environment, config["env"]["type"])(
    config, train_eval=split
)
env = env.init_env(batch_size=1)


def process_ob(ob):
    if ob.startswith("You arrive at loc "):
        ob = ob[ob.find(". ") + 2 :]
    return ob

NUM_GAMEFILES = len(env.gamefiles)

Initializing AlfredTWEnv...
Checking for solvable games...
Overall we have 134 games
Evaluating with 134 games


# React Baseline


In [4]:
from openai.error import RateLimitError
import time

def llm(prompt, stop=["\n"], temperature=0):
    try:
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=prompt,
            temperature=temperature,
            max_tokens=100,
            top_p=1,
            frequency_penalty=0.0,
            presence_penalty=0.0,
            stop=stop,
        )
        return response["choices"][0]["text"]
    except RateLimitError:
        print("Rate limit error.. sleeping for 5 seconds")
        time.sleep(5)
        return llm(prompt, stop, temperature)

In [5]:
import sys
import json

# load prompt few-shot templates
folder = "./prompts/"
prompt_file = "alfworld_3prompts_react.json"
with open(folder + prompt_file, "r") as f:
    alfworld_3prompts_react = json.load(f)


def alfworld_react_run(prompt, to_print=True, ob=""):
    init_prompt = prompt + ob + "\n>"
    prompt = ""
    if to_print:
        print(ob)
        sys.stdout.flush()
    for i in range(1, LLMDPConfig.max_steps):
        action = llm(init_prompt + prompt, stop=["\n"]).strip()
        observation, reward, done, info = env.step([action])
        observation, reward, done = process_ob(observation[0]), info["won"][0], done[0]
        if action.startswith("think:"):
            observation = "OK."
        if to_print:
            print(f"Act {i}: {action}\nObs {i}: {observation}")
            sys.stdout.flush()
        prompt += f" {action}\n{observation}\n>"
        if done:
            return reward, i
    return 0, i


In [None]:
prefixes = {
    "pick_and_place": "put",
    "pick_clean_then_place": "clean",
    "pick_heat_then_place": "heat",
    "pick_cool_then_place": "cool",
    "look_at_obj": "examine",
    "pick_two_obj": "puttwo",
}
cnts = [0] * 6 
rs = [0] * 6
results = []

LLMDPConfig.name = "react"

for _ in range(NUM_GAMEFILES):
    ob, info = env.reset()
    ob = "\n".join(ob[0].split("\n\n")[1:])
    name = "/".join(info["extra.gamefile"][0].split("/")[-3:-1])
    print(name)
    for i, (k, v) in enumerate(prefixes.items()):
        if name.startswith(k):
            prompt = (
                "Interact with a household to solve a task. Here are two examples.\n"
                + alfworld_3prompts_react[f"react_{v}_1"]
                + alfworld_3prompts_react[f"react_{v}_0"]
                + "\nHere is the task.\n"
            )
            print(k, v)
            r, length = alfworld_react_run(prompt, ob=ob)
            rs[i] += r
            cnts[i] += 1
            break
    results.append(
        {
            "task": v,
            "success": r,
            "length": length,
        }
    )
    print(
        _ + 1, "r", r, "rs", rs, "cnts", cnts, "sum(rs)/sum(cnts)", sum(rs) / sum(cnts)
    )
    print("------------\n")
    with open(
            f"{LLMDPConfig.output_dir}/{LLMDPConfig.name}_{LLMDPConfig.llm_model}_results.json",
            "w",
        ) as f:
            json.dump(results, f)