# Environment setup
## Reload and import agent.py

In [1]:
import importlib
import agent
importlib.reload(agent)
from agent import Agent

GEMINI_API_KEY: AIzaSyBOPnH1wBVvY-zqOgkefBn7ef8DrCqtpCg


## Reload and import env_setup.py

In [2]:
import importlib
import env_setup
importlib.reload(env_setup)
from env_setup import GridEnvironment

# Individual Instruction

In [None]:
# Initialize the environment and agent
env = GridEnvironment()
name = "3.txt"  # Change this to the name of the grid file you want to load like "1.txt", "2.txt", etc.
grid_file_path = env.get_grid_file_path(name)

if grid_file_path:
    env.load_grid_from_file(grid_file_path)
    env.display_grid()

# Get agent and object positions
agent_pos = env.get_agent_position()
exp = True # True for CP, False for Fs-CoT
llm = "gemma3"
object_pos = env.get_all_object_positions()

# Initialize the agent
my_agent = Agent(agent_pos)

# Provide instructions and test the LLM
human_move = "The human moves upward from their current position at (9,6) to (4,6), which is adjacent to the red door at (4,7). Upon arriving at (4,6), they provide the instruction." # Change the human move to test different cases
instruction = "Give me a red key for this door?" # Change the instruction to test different cases
response = my_agent.chain_of_thought_prompt(instruction, env.grid, human_move, object_pos, exp, llm)
print(response)


. . . y . . . b W W W W
r W W r W W . r W W W g
W W W W W W m W W W W R
W W W W W W . W W W W .
g . . . B . . R . . . .
W W W W W W . W W W W W
W W W W W W . W W W W W
. . . Y . . . W W W W W
B W W W W W . W W W W W
g W W W W W h . . . . g
Generating chain-of-thought prompt...
```
Human Action: The human moves upward from their current position at (9,6) to (4,6), which is adjacent to the red door at (4,7). Upon arriving at (4,6), they provide the instruction.
Instruction: Give me a red key for this door?
Type: Clear. The human is adjacent to the red door at (4,7) and requests a red key. This clearly indicates the intention to unlock this specific door and potentially access something beyond it.
Response: The human is requesting a red key to unlock the red door at (4,7). There are three red keys available on the grid at (1,0), (1,3), and (1,7). To minimize movement, I will collect the closest red key at (1,0) and pass it to the human at (4,6).
Actions:
1) Collect: red_key at (1,0).
2) P

# Experiments

## Experiment 1: Tomcat with CP (GPT-4o)

In [None]:
import pandas as pd

# Load the CSV file from the local directory
csv_file_path = 'ToM_CP-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gpt4o' column exists and is of type string
if 'Generated_response_gpt4o' not in df.columns:
    df['Generated_response_gpt4o'] = ""  # Initialize with empty strings
else:
    df['Generated_response_gpt4o'] = df['Generated_response_gpt4o'].astype(str)

# num_rows = 1  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = True
        #LLM used
        llm = "gpt-4o"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_gpt4o'] = str(response)

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)

## Experiment 2: Tomcat with Fs-CoT (GPT-4o)

In [None]:
import pandas as pd

# Load the CSV file from the local directory
csv_file_path = 'ToM_FsCoT-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gpt4o' column exists and is of type string
if 'Generated_response_gpt4o' not in df.columns:
    df['Generated_response_gpt4o'] = ""  # Initialize with empty strings
else:
    df['Generated_response_gpt4o'] = df['Generated_response_gpt4o'].astype(str)

# num_rows = 1  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = False
        #LLM used
        llm = "gpt-4o"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_gpt4o'] = str(response)

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)

## Experiment 3: Tomcat with CP (Gemma3)

In [None]:
import pandas as pd
import time

# Load the CSV file from the local directory
csv_file_path = 'ToM_CP-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gemma3' column exists and is of type string
if 'Generated_response_gemma3' not in df.columns:
    df['Generated_response_gemma3'] = ""  # Initialize with empty strings
else:
    df['Generated_response_gemma3'] = df['Generated_response_gemma3'].astype(str)

# num_rows = 3  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

batch_size = 2  # Number of rows to process in each batch
sleep_time = 60  # Sleep time in seconds between batches
counter = 0 

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = True
        #LLM used
        llm = "gemma3"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_gemma3'] = response
        counter += 1
        if counter % batch_size == 0:
            print(f"Sleeping for {sleep_time} seconds after {counter} rows...")
            time.sleep(sleep_time)
        

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)

Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 2 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 4 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 6 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 8 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 10 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 12 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 14 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 16 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought pr

## Experiment 4: Tomcat with Fs-CoT (Gemma3)

In [62]:
import pandas as pd
import time

# Load the CSV file from the local directory
csv_file_path = 'ToM_FsCoT-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gemma3' column exists and is of type string
if 'Generated_response_gemma3' not in df.columns:
    df['Generated_response_gemma3'] = ""  # Initialize with empty strings
else:
    df['Generated_response_gemma3'] = df['Generated_response_gemma3'].astype(str)

# num_rows = 1  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

batch_size = 2  # Number of rows to process in each batch
sleep_time = 60  # Sleep time in seconds between batches
counter = 0 

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = False
        #LLM used
        llm = "gemma3"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_gemma3'] = response
        
        counter += 1
        if counter % batch_size == 0:
            print(f"Sleeping for {sleep_time} seconds after {counter} rows...")
            time.sleep(sleep_time)

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)

Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 2 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 4 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 6 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 8 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 10 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 12 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 14 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Sleeping for 60 seconds after 16 rows...
Generating chain-of-thought prompt...
Generating chain-of-thought pr

## Experiment 5: Tomcat with CP (Deepseek-R1)

In [None]:
import pandas as pd
# import time

# Load the CSV file from the local directory
csv_file_path = 'ToM_CP-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gemma3' column exists and is of type string
if 'Generated_response_R1' not in df.columns:
    df['Generated_response_R1'] = ""  # Initialize with empty strings
else:
    df['Generated_response_R1'] = df['Generated_response_R1'].astype(str)

# num_rows = 3  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

batch_size = 2  # Number of rows to process in each batch
sleep_time = 60  # Sleep time in seconds between batches
counter = 0 

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = True
        #LLM used
        llm = "R1"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_R1'] = response
        # counter += 1
        # if counter % batch_size == 0:
        #     print(f"Sleeping for {sleep_time} seconds after {counter} rows...")
        #     time.sleep(sleep_time)
        

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)

Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...
Generating chain-of-thought prompt...


## Experiment 6: Tomcat with Fs-Cot (Deepseek-R1)


In [None]:
import pandas as pd
# import time

# Load the CSV file from the local directory
csv_file_path = 'ToM_FsCoT-dataset.csv'  
df = pd.read_csv(csv_file_path)
#print(df.head())

# Ensure the 'Generated_response_gemma3' column exists and is of type string
if 'Generated_response_R1' not in df.columns:
    df['Generated_response_R1'] = ""  # Initialize with empty strings
else:
    df['Generated_response_R1'] = df['Generated_response_R1'].astype(str)

# num_rows = 3  # Adjust this to process 5 or 10 rows, or any other number
# df_subset = df.head(num_rows)

batch_size = 2  # Number of rows to process in each batch
sleep_time = 60  # Sleep time in seconds between batches
counter = 0 

# Loop through each row, read the first and second columns, and generate the response
for index, row in df.iterrows():
    human = row.iloc[0]
    instruction = row.iloc[1]  
    file_name = row.iloc[2]   


    # Initialize the environment and agent for the single file
    env = GridEnvironment()
    grid_file_path = env.get_grid_file_path(file_name)

    if grid_file_path:
        env.load_grid_from_file(grid_file_path)
        # env.display_grid()

        # Get agent and object positions
        agent_pos = env.get_agent_position()
        object_pos = env.get_all_object_positions()

        # Initialize the agent
        my_agent = Agent(agent_pos)
        
        # Experiment flag
        exp = False
        #LLM used
        llm = "R1"

        # Generate the response using the agent and instruction
        response = my_agent.chain_of_thought_prompt(instruction, env.grid, human, object_pos, exp, llm)
        
        # Append the generated response directly into the DataFrame
        df.at[index, 'Generated_response_R1'] = response
        counter += 1
        print(f"Counter: {counter}")
        # if counter % batch_size == 0:
        #     print(f"Sleeping for {sleep_time} seconds after {counter} rows...")
        #     time.sleep(sleep_time)
        

# # After processing the subset, save the updated DataFrame to the CSV file
df.to_csv(csv_file_path, index=False)