In [None]:
!pip install -U --pre dspy 

In [None]:
!pip install -U alfworld==0.3.5 multiprocess
alfworld-download


In [1]:
from dspy.datasets.alfworld import AlfWorld

alfworld = AlfWorld()
trainset, devset = alfworld.trainset[:200], alfworld.devset[-200:]
len(trainset), len(devset)

  from .autonotebook import tqdm as notebook_tqdm


(200, 200)

In [2]:

import dspy
import os
from dotenv import load_dotenv
load_dotenv()
base_url = 'https://integrate.api.nvidia.com/v1'
model = "meta/llama-3.3-70b-instruct"

llama33_70b = dspy.LM(f'openai/{model}', api_key=os.environ["NVIDIA_API_KEY"], api_base=base_url)
dev_model="nvdev/mistralai/mixtral-8x22b-instruct-v0.1" # nvdev/meta/llama-3.1-405b-instruct'
dev_dspy_model = dspy.LM(f'openai/{dev_model}', api_key=os.environ["NV_DEV_API_KEY"], api_base=base_url, temperature=0.7)

#dspy.configure(lm=dev_dspy_model)
dspy.configure(experimental=True)


In [3]:
example = trainset[0]

In [4]:
example

Example({'idx': 1650}) (input_keys={'idx'})

In [5]:
#example = trainset[0]

with alfworld.POOL.session() as env:
    task, info = env.init(**example.inputs())

print(task)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 2, a diningtable 1, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: find two saltshaker and put them in cabinet.


In [6]:
info["admissible_commands"]

[['go to cabinet 1',
  'go to cabinet 10',
  'go to cabinet 2',
  'go to cabinet 3',
  'go to cabinet 4',
  'go to cabinet 5',
  'go to cabinet 6',
  'go to cabinet 7',
  'go to cabinet 8',
  'go to cabinet 9',
  'go to coffeemachine 1',
  'go to countertop 1',
  'go to countertop 2',
  'go to diningtable 1',
  'go to diningtable 2',
  'go to drawer 1',
  'go to drawer 2',
  'go to drawer 3',
  'go to fridge 1',
  'go to garbagecan 1',
  'go to microwave 1',
  'go to sinkbasin 1',
  'go to stoveburner 1',
  'go to stoveburner 2',
  'go to stoveburner 3',
  'go to stoveburner 4',
  'go to toaster 1',
  'inventory',
  'look']]

### construct Dspy module

In [11]:
from colorama import Fore
import dspy
"""
class GenerateAction(dspy.Signature):
    instructions=
    task = dspy.InputField()
    trajectory = dspy.InputField()
    possible_actions = dspy.InputField()
    action = dspy.OutputField(desc="action to take in the environment")
"""

class PlanGraphAgent(dspy.Module):
    def __init__(self, max_iters=200, verbose=False, with_instruction=True):
        self.max_iters = max_iters
        self.verbose = verbose
        
        if with_instruction: 
            self.INSTRUCTIONS = """
            Interact with a simulated household to achieve a high-level goal. Make sure to plan, track subgoals,
            determine likely locations for common household items (e.g. desklamps will likely be on desks, shelfs, or dressers),
            and explore systematically (e.g. check all desks one by one for desklamp).
            """.strip()
            self.react = dspy.Predict(dspy.Signature("task, trajectory, possible_actions: list[str] -> action", self.INSTRUCTIONS))
        else:
            self.react = dspy.Predict("task, trajectory, possible_actions: list[str] -> action")

        print(Fore.GREEN , self.react)

    def forward(self, idx):
        with alfworld.POOL.session() as env:
            trajectory = []
            task, info = env.init(idx)
            if self.verbose:
                print(Fore.BLACK + "---"*15)
                print(Fore.RED + f"################# Task: {task}")

            for i in range(self.max_iters):
                print(Fore.BLACK + f" ------------------------------------------------------------ iteration {str(i)} -------------------------------------------------- ")
                trajectory_ = "\n".join(trajectory)
                print(Fore.MAGENTA + "****** trajectory *******:\n", trajectory  )
                possible_actions = info["admissible_commands"][0] + ["think: ${...thoughts...}"]
                print(Fore.CYAN + "******current possible actions*********  :\n" , possible_actions )
                prediction = self.react(task=task, trajectory=trajectory_, possible_actions=possible_actions)
                print(Fore.CYAN +">>>>>>>>>>>>>>>> agent prediction is _\n", prediction)
                trajectory.append(f"> {prediction.action}")

                if prediction.action.startswith("think:"):
                    trajectory.append("OK.")
                    continue
                
                obs, reward, done, info = env.step(prediction.action)
                obs, reward, done = obs[0], reward[0], done[0]
                print(Fore.BLUE + "### env feedback per step obs, reward, done = ", obs,'|', reward, '|', done)
                print(Fore.CYAN + "Current Observation :\n", obs)
                trajectory.append(obs)

                if self.verbose:
                    print("\n".join(trajectory[-2:]))

                if done:
                    print( "************ "*5 )
                    print(Fore.GREEN + "CONGRATULATIONS you completed the task :  ", task,  "\n Great Job d^__^b ")
                    break

        assert reward == int(info["won"][0]), (reward, info["won"][0])
        return dspy.Prediction(trajecotry=trajectory, success=reward)


In [9]:
myagent = PlanGraphAgent()
myagent.set_lm(dev_dspy_model)
myagent.verbose = True

myagent(**example.inputs())

[32m Predict(StringSignature(task, trajectory, possible_actions -> action
    instructions='Interact with a simulated household to achieve a high-level goal. Make sure to plan, track subgoals,\ndetermine likely locations for common household items (e.g. desklamps will likely be on desks, shelfs, or dressers),\nand explore systematically (e.g. check all desks one by one for desklamp).'
    task = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Task:', 'desc': '${task}'})
    trajectory = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Trajectory:', 'desc': '${trajectory}'})
    possible_actions = Field(annotation=list[str] required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Possible Actions:', 'desc': '${possible_actions}'})
    action = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Action:', 'desc': '${action}'})
))
[31m

Prediction(
    trajecotry=['> go to cabinet 1', 'You arrive at loc 27. On the cabinet 1, you see nothing.', '> go to cabinet 2', 'You arrive at loc 2. The cabinet 2 is closed.', '> open cabinet 2', 'You open the cabinet 2. The cabinet 2 is open. In it, you see a bowl 2.', '> go to cabinet 3', 'You arrive at loc 8. On the cabinet 3, you see nothing.', '> go to cabinet 4', 'Nothing happens.', '> go to cabinet 5', 'You arrive at loc 7. The cabinet 5 is closed.', '> open cabinet 5', 'You open the cabinet 5. The cabinet 5 is open. In it, you see nothing.', '> go to cabinet 6', 'You arrive at loc 11. The cabinet 6 is closed.', '> open cabinet 6', 'You open the cabinet 6. The cabinet 6 is open. In it, you see nothing.', '> go to cabinet 7', 'You arrive at loc 25. On the cabinet 7, you see nothing.', '> go to cabinet 8', 'Nothing happens.', '> go to cabinet 9', 'You arrive at loc 24. The cabinet 9 is closed.', '> open cabinet 9', 'You open the cabinet 9. The cabinet 9 is open. In it, you see 

In [None]:
metric = lambda x, y, trace=None: y.success
### skipping naive evaluation to save API calls 
#evaluate = dspy.Evaluate(devset=devset, metric=metric, display_progress=True, num_threads=16)
#agent_4o.verbose = False
#evaluate(agent_4o)

optimizer = dspy.MIPROv2(metric=metric, auto="light", num_threads=16, prompt_model=dev_dspy_model)

config = dict(max_bootstrapped_demos=1, max_labeled_demos=0, minibatch_size=40)
optimized_agent = optimizer.compile(myagent, trainset=trainset, **config, requires_permission_to_run=False)


2025/01/09 16:43:42 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: True
num_candidates: 5
valset size: 100

2025/01/09 16:43:42 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/01/09 16:43:42 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/01/09 16:43:42 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=5 sets of demonstrations...


Bootstrapping set 1/5
Bootstrapping set 2/5


  0%|                                                                                                                                                                    | 0/40 [00:00<?, ?it/s]

[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 16, a cabinet 15, a cabinet 14, a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 1, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a safe 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: heat some apple and put it in garbagecan.
[30m ------------------------------------------------------------ iteration 0 -------------------------------------------------- 
[35m****** trajectory *******:
 []
[36m******current possible actions*********  :
 ['go to cabinet 1', 'go to cabinet 10', 'go to cabinet 11', 'go to cabinet 12', 'go to cabine

  2%|███▉                                                                                                                                                        | 1/40 [00:09<06:18,  9.69s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | True
[36mCurrent Observation :
 Nothing happens.
> put apple 1 in/on cup 1
Nothing happens.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 16, a cabinet 15, a cabinet 14, a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 1, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a safe 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: heat some apple and put it in garbagecan. 
 Great Job d^__^b 
[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Loo

  5%|███████▊                                                                                                                                                    | 2/40 [00:14<04:20,  6.86s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | False
[36mCurrent Observation :
 Nothing happens.
> examine bowl 1
Nothing happens.
[30m ------------------------------------------------------------ iteration 48 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to desk 1', 'You arrive at loc 1. On the desk 1, you see a bowl 2, a bowl 1, a cellphone 2, a creditcard 1, a pen 3, a pencil 3, and a remotecontrol 2.', '> examine bowl 2 on desk 1', 'Nothing happens.', '> take bowl 2 from desk 1', 'You pick up the bowl 2 from the desk 1.', '> examine bowl 2', 'This is a normal bowl 2. In it, you see nothing.', '> go to shelf 1', 'You arrive at loc 12. On the shelf 1, you see a alarmclock 2, a alarmclock 1, a cd 1, and a keychain 1.', '> go to desk 1', 'You arrive at loc 1. On the desk 1, you see a bowl 1, a cellphone 2, a creditcard 1, a pen 3, a pencil 3, and a remotecontrol 2.', '> go to shelf 2', 'You arrive at loc 5. On 

  8%|███████████▋                                                                                                                                                | 3/40 [00:19<04:02,  6.57s/it]


[34m### env feedback per step obs, reward, done =  You arrive at loc 10. On the garbagecan 1, you see a winebottle 2. | 0 | False
[36mCurrent Observation :
 You arrive at loc 10. On the garbagecan 1, you see a winebottle 2.
> go to garbagecan 1
You arrive at loc 10. On the garbagecan 1, you see a winebottle 2.
[30m ------------------------------------------------------------ iteration 15 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to cabinet 1', 'You arrive at loc 31. The cabinet 1 is closed.', '> open cabinet 1', 'You open the cabinet 1. The cabinet 1 is open. In it, you see a vase 1.', '> go to cabinet 2', 'You arrive at loc 35. The cabinet 2 is closed.', '> open cabinet 2', 'You open the cabinet 2. The cabinet 2 is open. In it, you see a winebottle 2.', '> take winebottle 2 from cabinet 2', 'You pick up the winebottle 2 from the cabinet 2.', '> go to garbagecan 1', 'You arrive at loc 10. On the garbagecan 1, you see nothing.', '> pu

  0%|                                                                                                                                                                    | 0/40 [00:00<?, ?it/s]

[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 2, a diningtable 1, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: find two saltshaker and put them in cabinet.
[30m ------------------------------------------------------------ iteration 0 -------------------------------------------------- 
[35m****** trajectory *******:
 []
[36m******current possible actions*********  :
 ['go to cabinet 1', 'go to cabinet 10', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to cabinet 5', 'go to cabinet 6', 'go to cabinet 7', 'go to cabinet 8', 'go to cabinet 9',

  2%|███▉                                                                                                                                                        | 1/40 [00:08<05:27,  8.40s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | False
[36mCurrent Observation :
 Nothing happens.
> put peppershaker 1 in cabinet 10
Nothing happens.
[30m ------------------------------------------------------------ iteration 49 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to cabinet 1', 'You arrive at loc 27. On the cabinet 1, you see nothing.', '> go to cabinet 2', 'You arrive at loc 2. The cabinet 2 is closed.', '> open cabinet 2', 'You open the cabinet 2. The cabinet 2 is open. In it, you see a bowl 2.', '> go to cabinet 3', 'You arrive at loc 8. On the cabinet 3, you see nothing.', '> go to cabinet 4', 'Nothing happens.', '> go to cabinet 5', 'You arrive at loc 7. The cabinet 5 is closed.', '> open cabinet 5', 'You open the cabinet 5. The cabinet 5 is open. In it, you see nothing.', '> go to cabinet 6', 'You arrive at loc 11. The cabinet 6 is closed.', '> open cabinet 6', 'You open the cabinet 6. The cabin

  5%|███████▊                                                                                                                                                    | 2/40 [00:17<05:26,  8.60s/it]

[34m### env feedback per step obs, reward, done =  You arrive at loc 26. The cabinet 11 is closed. | 0 | True
[36mCurrent Observation :
 You arrive at loc 26. The cabinet 11 is closed.
> go to cabinet 11
You arrive at loc 26. The cabinet 11 is closed.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 2, a diningtable 1, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sidetable 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: put some butterknife on sidetable. 
 Great Job d^__^b 
[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a ro

  8%|███████████▋                                                                                                                                                | 3/40 [00:22<04:20,  7.05s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | False
[36mCurrent Observation :
 Nothing happens.
> examine bowl 1
Nothing happens.
[30m ------------------------------------------------------------ iteration 48 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to desk 1', 'You arrive at loc 1. On the desk 1, you see a bowl 2, a bowl 1, a cellphone 2, a creditcard 1, a pen 3, a pencil 3, and a remotecontrol 2.', '> examine bowl 2 on desk 1', 'Nothing happens.', '> take bowl 2 from desk 1', 'You pick up the bowl 2 from the desk 1.', '> examine bowl 2', 'This is a normal bowl 2. In it, you see nothing.', '> go to shelf 1', 'You arrive at loc 12. On the shelf 1, you see a alarmclock 2, a alarmclock 1, a cd 1, and a keychain 1.', '> go to desk 1', 'You arrive at loc 1. On the desk 1, you see a bowl 1, a cellphone 2, a creditcard 1, a pen 3, a pencil 3, and a remotecontrol 2.', '> go to shelf 2', 'You arrive at loc 5. On 

 10%|███████████████▌                                                                                                                                            | 4/40 [00:31<04:41,  7.82s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | True
[36mCurrent Observation :
 Nothing happens.
> take remotecontrol from armchair 1
Nothing happens.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a armchair 2, a armchair 1, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeetable 2, a coffeetable 1, a diningtable 1, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a shelf 13, a shelf 12, a shelf 11, a shelf 10, a shelf 9, a shelf 8, a shelf 7, a shelf 6, a shelf 5, a shelf 4, a shelf 3, a shelf 2, a shelf 1, and a sofa 1.

Your task is to: put some remotecontrol on armchair. 
 Great Job d^__^b 
[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a dra

 12%|███████████████████▌                                                                                                                                        | 5/40 [00:36<04:02,  6.94s/it]

[34m### env feedback per step obs, reward, done =  You arrive at loc 10. The drawer 9 is open. In it, you see a pencil 3. | 0 | False
[36mCurrent Observation :
 You arrive at loc 10. The drawer 9 is open. In it, you see a pencil 3.
> go to drawer 9
You arrive at loc 10. The drawer 9 is open. In it, you see a pencil 3.
[30m ------------------------------------------------------------ iteration 49 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to drawer 1', 'You arrive at loc 21. The drawer 1 is closed.', '> open drawer 1', 'You open the drawer 1. The drawer 1 is open. In it, you see nothing.', '> go to drawer 2', 'You arrive at loc 25. The drawer 2 is closed.', '> open drawer 2', 'You open the drawer 2. The drawer 2 is open. In it, you see nothing.', '> go to drawer 3', 'You arrive at loc 24. The drawer 3 is closed.', '> open drawer 3', 'You open the drawer 3. The drawer 3 is open. In it, you see nothing.', '> go to drawer 4', 'Nothing hap

 15%|███████████████████████▍                                                                                                                                    | 6/40 [00:46<04:32,  8.03s/it]

[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | False
[36mCurrent Observation :
 Nothing happens.
> put egg 4 in/on microwave 1
Nothing happens.
[30m ------------------------------------------------------------ iteration 49 -------------------------------------------------- 
[35m****** trajectory *******:
 ['> go to fridge 1', 'You arrive at loc 40. The fridge 1 is closed.', '> open fridge 1', 'You open the fridge 1. The fridge 1 is open. In it, you see a cup 3, a egg 4, a egg 3, a lettuce 1, and a tomato 2.', '> take egg 4 from fridge 1', 'You pick up the egg 4 from the fridge 1.', '> put egg 4 in microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4 in/on microwave 1', 'Nothing happens.', '> put egg 4

 18%|███████████████████████████▎                                                                                                                                | 7/40 [01:33<11:21, 20.64s/it]

[36m>>>>>>>>>>>>>>>> agent prediction is _
 Prediction(
    action='take desklamp 1 from sidetable 2'
)
[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | True
[36mCurrent Observation :
 Nothing happens.
> take desklamp 1 from sidetable 2
Nothing happens.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a desk 1, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a safe 1, a shelf 5, a shelf 4, a shelf 3, a shelf 2, a shelf 1, a sidetable 2, and a sidetable 1.

Your task is to: examine the pillow with the desklamp. 
 Great Job d^__^b 
[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeema

 20%|███████████████████████████████▏                                                                                                                            | 8/40 [02:46<19:52, 37.28s/it]

[36m>>>>>>>>>>>>>>>> agent prediction is _
 Prediction(
    action='put bowl 1 in/on cabinet 1'
)
[34m### env feedback per step obs, reward, done =  Nothing happens. | 0 | True
[36mCurrent Observation :
 Nothing happens.
> put bowl 1 in/on cabinet 1
Nothing happens.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1.

Your task is to: put a cool bowl in cabinet. 
 Great Job d^__^b 
[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. 

 22%|███████████████████████████████████                                                                                                                         | 9/40 [03:52<13:19, 25.79s/it]


[36m>>>>>>>>>>>>>>>> agent prediction is _
 Prediction(
    action='go to desk 1'
)
[34m### env feedback per step obs, reward, done =  You arrive at loc 17. On the desk 1, you see a cd 1, a cellphone 1, a creditcard 2, a desklamp 1, a laptop 1, a mug 2, a mug 1, and a pencil 1. | 1 | True
[36mCurrent Observation :
 You arrive at loc 17. On the desk 1, you see a cd 1, a cellphone 1, a creditcard 2, a desklamp 1, a laptop 1, a mug 2, a mug 1, and a pencil 1.
> go to desk 1
You arrive at loc 17. On the desk 1, you see a cd 1, a cellphone 1, a creditcard 2, a desklamp 1, a laptop 1, a mug 2, a mug 1, and a pencil 1.
[32mCONGRATULATIONS you completed the task :   -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.

Your task is to: examine the pillow with the desklamp. 
 Great Job d^__^b 
Bootstrapped 1 full traces after 9 examples for up to 1 ro

  0%|                                                                                                                                                                    | 0/40 [00:00<?, ?it/s]

[31m################# Task: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bathtubbasin 1, a countertop 1, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a handtowelholder 1, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, a towelholder 2, and a towelholder 1.

Your task is to: put a toiletpaper in toiletpaperhanger.
[30m ------------------------------------------------------------ iteration 0 -------------------------------------------------- 
[35m****** trajectory *******:
 []
[36m******current possible actions*********  :
 ['go to bathtubbasin 1', 'go to countertop 1', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to drawer 6', 'go to drawer 7', 'go to drawer 8', 'go to garbagecan 1', 'go to handtowelholder 1', 'go to sinkbasin 1', 'go to toilet 1', 'go to toiletpaperhanger 1', 'go to towelholder 1', 'go t