In [6]:
import sys
import requests
from alfworld.agents.environment import get_environment
import alfworld.agents.modules.generic as generic

# --- THE JUPYTER BYPASS ---
sys.argv = ['jupyter_notebook.py', 'configs/base_config.yaml']

# Load config and setup environment
config = generic.load_config()
env_type = config['env']['type']
env = get_environment(env_type)(config, train_eval='train')
env = env.init_env(batch_size=1)

Initializing AlfredTWEnv...


100%|██████████| 8810/8810 [00:05<00:00, 1579.38it/s]

Overall we have 3553 games in split=train
Training with 3553 games





# Baseline LLM 

In [None]:
API_KEY = "sk-SO0OE_L0z_JJPscdQSO2jg"
URL = "https://tritonai-api.ucsd.edu/v1/chat/completions"

def get_llm_action(observation, valid_commands):
    """Sends the game state to the LLM and returns its chosen action."""
    
    # We enforce a strict prompt to prevent conversational filler
    prompt = f"""You are an intelligent agent playing a text-based household game.
Current Observation: {observation}
Valid Commands: {valid_commands}

Choose the most logical next action from the Valid Commands to explore the environment and complete your underlying objective.
CRITICAL: Output ONLY the exact text of the command you choose, and absolutely nothing else."""

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }
    
    payload = {
        "model": "api-llama-4-scout",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.0  # Keep it at 0 so it doesn't hallucinate invalid commands
    }
    
    response = requests.post(URL, headers=headers, json=payload)
    
    if response.status_code == 200:
        # Extract and clean up the text from the API response
        return response.json()["choices"][0]["message"]["content"].strip()
    else:
        print(f"API Error {response.status_code}: {response.text}")
        return "look" # Safe fallback action so the game doesn't crash

In [8]:
# --- GAME SETUP ---
config = generic.load_config()
env_type = config['env']['type']
env = get_environment(env_type)(config, train_eval='train')
env = env.init_env(batch_size=1)

obs, info = env.reset()
print("=== ALFWorld + LLM Agent Started ===\n")
print(f"Observation: {obs[0]}\n" + "-"*40)

# --- LLM PLAY LOOP ---
MAX_STEPS = 15 # Safety limit for API calls

for step in range(MAX_STEPS):
    # Get the list of allowed commands for this specific step
    valid_commands = list(info['admissible_commands'][0])
    
    # Ask the LLM what to do
    print("Thinking...")
    action = get_llm_action(obs[0], valid_commands)
    print(f"LLM Action: {action}")
    
    # Take the step in the environment
    obs, scores, dones, infos = env.step([action])
    print(f"Observation: {obs[0]}\n" + "-"*40)
    
    # Update the info dictionary for the next loop
    info = {k: v for k, v in infos.items()}
    
    # Check if the LLM won
    if dones[0]:
        print(f"\nGame Over! The LLM finished with a score of: {scores[0]}")
        break

if not dones[0]:
    print("\nHit max steps. The LLM didn't finish the task in time.")

Initializing AlfredTWEnv...


100%|██████████| 8810/8810 [00:05<00:00, 1592.07it/s]


Overall we have 3553 games in split=train
Training with 3553 games
=== ALFWorld + LLM Agent Started ===

Observation: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.

Your task is to: put some pencil on shelf.
----------------------------------------
Thinking...
LLM Action: go to desk 1
Observation: You arrive at desk 1. On the desk 1, you see a alarmclock 2, a alarmclock 1, a book 3, a box 1, a creditcard 3, a creditcard 2, a laptop 2, and a pencil 3.
----------------------------------------
Thinking...
LLM Action: examine desk 1
Observation: On the desk 1, you see a alarmclock 2, a alarmclock 1, a book 3, a box 1, a creditcard 3, a creditcard 2, a laptop 2, and a pencil 3.
----------------------------------------
Thinking...
LLM Action: examine desk 1
Observation: On the desk 1, you see a

As we can see, since the model only takes in the observation at each step, it will repeat actions like "examine desk 1" since it does not know that it had already done so.

#### Baseline LLM with Memory

In [13]:
import sys
import requests
from alfworld.agents.environment import get_environment
import alfworld.agents.modules.generic as generic

# --- THE JUPYTER BYPASS ---
sys.argv = ['jupyter_notebook.py', 'configs/base_config.yaml']

# --- API SETUP ---
API_KEY = "sk-SO0OE_L0z_JJPscdQSO2jg"
URL = "https://tritonai-api.ucsd.edu/v1/chat/completions"

def get_llm_action(observation, valid_commands, history):
    """Sends the game state AND history to the LLM to get the next action."""
    
    prompt = f"""You are an intelligent agent playing a text-based household game.
Your task is given in the initial observation.

Here is the transcript of what you have done so far:
{history}

Current Observation: {observation}
Valid Commands: {valid_commands}

Choose the most logical next action from the Valid Commands to progress towards your task. 
Use your history to avoid repeating actions that do not change the environment.
CRITICAL: Output ONLY the exact text of the command you choose, and absolutely nothing else."""

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }
    
    payload = {
        "model": "api-llama-4-scout",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.0  
    }
    
    response = requests.post(URL, headers=headers, json=payload)
    
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"].strip()
    else:
        print(f"API Error {response.status_code}: {response.text}")
        return "look" 

# --- GAME SETUP ---
config = generic.load_config()
env_type = config['env']['type']
env = get_environment(env_type)(config, train_eval='train')
env = env.init_env(batch_size=1)

obs, info = env.reset()
print("=== ALFWorld + LLM Agent Started ===\n")

# Initialize the memory string
chat_history = f"Initial Observation: {obs[0]}\n"
print(chat_history + "-"*40)

# --- LLM PLAY LOOP ---
MAX_STEPS = 15

for step in range(MAX_STEPS):
    valid_commands = list(info['admissible_commands'][0])
    
    print("Thinking...")
    # Pass the running history to the LLM
    action = get_llm_action(obs[0], valid_commands, chat_history)
    print(f"LLM Action: {action}")
    
    # Take the step
    obs, scores, dones, infos = env.step([action])
    print(f"Observation: {obs[0]}\n" + "-"*40)
    
    # Update the memory for the next loop
    chat_history += f"> Action: {action}\nResulting Observation: {obs[0]}\n"
    
    info = {k: v for k, v in infos.items()}
    
    if dones[0]:
        print(f"\nGame Over! The LLM finished with a score of: {scores[0]}")
        break

if not dones[0]:
    print("\nHit max steps. The LLM didn't finish the task in time.")

Initializing AlfredTWEnv...


100%|██████████| 8810/8810 [00:05<00:00, 1571.26it/s]


Overall we have 3553 games in split=train
Training with 3553 games
=== ALFWorld + LLM Agent Started ===

Initial Observation: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.

Your task is to: put some pencil on shelf.
----------------------------------------
Thinking...
LLM Action: go to desk 1
Observation: You arrive at desk 1. On the desk 1, you see a alarmclock 2, a alarmclock 1, a book 3, a box 1, a creditcard 3, a creditcard 2, a laptop 2, and a pencil 3.
----------------------------------------
Thinking...
LLM Action: take pencil 3 from desk 1
Observation: You pick up the pencil 3 from the desk 1.
----------------------------------------
Thinking...
LLM Action: go to shelf 1
Observation: You arrive at shelf 1. On the shelf 1, you see a keychain 2, and a pen 1.
------------------------