## Run trained models for qualitative analysis


In [1]:
import os
from collections import Counter
from copy import deepcopy
from glob import glob
import matplotlib.pyplot as plt

import torch
from tqdm.auto import tqdm
from humemai.utils import read_yaml, read_json

from agent import DQNAgent


num_tests = 20
string_map = read_json(
    "./trained-results/non-equal-object-probs/dqn/"
    "room_size=xl-different-prob/capacity=96/"
    "2024-08-12 23:58:06.290168/generic-to-realistic-mapping.json"
)

train_dir = (
    "./trained-results/non-equal-object-probs/CB/"
    "room_size=xl-different-prob/capacity=96/"
    "2024-09-28 16:10:51.589604/"
)

# dqn_model_path = glob(os.path.join(train_dir, "*.pt"))[0]
params = read_yaml(os.path.join(train_dir, "train.yaml"))
params["default_root_dir"] = "./training-results/TRASH"
params["pretrained_path"] = train_dir
results = read_yaml(os.path.join(train_dir, "results.yaml"))

stats_interest = [
    {
        "hidden_state": None,
        "state": [],
        "a_qa": [],
        "q_qa": [],
        "score": 0,
        "rewards": [],
        "questions": [],
        "answers": [],
    }
    for _ in range(num_tests)
]


for seed in tqdm(range(num_tests)):
    params["test_seed"] = seed
    params["train_seed"] = seed
    agent = DQNAgent(**params)

    for param in agent.dqn.parameters():
        param.requires_grad = False

    done = True
    while True:
        if done:
            agent.reset()
            done = False

        else:
            state = deepcopy(agent.memory_systems.get_working_memory().to_list())
            (
                answers,
                a_qa,
                q_qa,
                reward,
                done,
                que,
            ) = agent.step(greedy=True)

            stats_interest[seed]["state"].append(state)
            stats_interest[seed]["a_qa"].append(a_qa)
            stats_interest[seed]["q_qa"].append(q_qa)
            stats_interest[seed]["score"] += sum(reward)
            stats_interest[seed]["rewards"].append(reward)
            stats_interest[seed]["questions"].append(que)
            stats_interest[seed]["answers"].append(answers)

        if done:
            stats_interest[seed]["hidden_state"] = deepcopy(
                agent.env.unwrapped.hidden_global_states_all
            )

            break

for idx in range(num_tests):
    print(
        idx,
        ": ",
        stats_interest[idx]["score"],
    )

  from .autonotebook import tqdm as notebook_tqdm
  logger.deprecation(
  logger.deprecation(
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Running on cpu
Assertion passed: dict A is part of dict B.


  5%|▌         | 1/20 [00:01<00:19,  1.02s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 10%|█         | 2/20 [00:01<00:17,  1.03it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 15%|█▌        | 3/20 [00:02<00:15,  1.08it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 20%|██        | 4/20 [00:03<00:15,  1.02it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 25%|██▌       | 5/20 [00:04<00:14,  1.03it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 30%|███       | 6/20 [00:05<00:13,  1.01it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 35%|███▌      | 7/20 [00:06<00:12,  1.04it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 40%|████      | 8/20 [00:07<00:11,  1.03it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 45%|████▌     | 9/20 [00:08<00:10,  1.06it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 50%|█████     | 10/20 [00:09<00:09,  1.02it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 55%|█████▌    | 11/20 [00:10<00:08,  1.03it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 60%|██████    | 12/20 [00:11<00:07,  1.06it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


 65%|██████▌   | 13/20 [00:12<00:07,  1.01s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 70%|███████   | 14/20 [00:13<00:06,  1.01s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 75%|███████▌  | 15/20 [00:14<00:05,  1.02s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 80%|████████  | 16/20 [00:15<00:04,  1.08s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 85%|████████▌ | 17/20 [00:16<00:03,  1.04s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 90%|█████████ | 18/20 [00:17<00:02,  1.01s/it]

Running on cpu
Assertion passed: dict A is part of dict B.


 95%|█████████▌| 19/20 [00:18<00:00,  1.02it/s]

Running on cpu
Assertion passed: dict A is part of dict B.


100%|██████████| 20/20 [00:19<00:00,  1.00it/s]

0 :  545
1 :  342
2 :  382
3 :  566
4 :  469
5 :  496
6 :  386
7 :  437
8 :  436
9 :  478
10 :  506
11 :  430
12 :  543
13 :  389
14 :  428
15 :  391
16 :  462
17 :  348
18 :  324
19 :  445





### Choose the best index

In [18]:
train_of_interest = 3
step_of_interest = 20

In [19]:
for foo in stats_interest[train_of_interest]["state"][step_of_interest]:
    for bar in foo:
        try:
            print(string_map[bar])
        except:
            print(bar)
    print()

Wine
west
Vault
{'current_time': 20}

Wine
north
Attic
{'current_time': 20}

Wine
east
Storage
{'current_time': 20}

Agent
atlocation
Wine
{'current_time': 20}

Wine
south
Wall
{'current_time': 20}

Bag
atlocation
Foyer
{'strength': 1}

Agent
atlocation
Foyer
{'strength': 1}

Foyer
north
Wall
{'strength': 1}

Coat
atlocation
Foyer
{'strength': 1}

Living
north
Wall
{'timestamp': [1]}

Agent
atlocation
Living
{'timestamp': [1]}

Living
south
Dining
{'strength': 1}

Agent
atlocation
Dining
{'strength': 1}

Dining
east
Study
{'strength': 1}

Dining
south
Wall
{'strength': 1}

Dining
north
Living
{'strength': 1}

Desk
atlocation
Study
{'strength': 1}

Study
north
Wall
{'strength': 1}

Study
west
Dining
{'strength': 1}

Study
south
Bath
{'strength': 1}

Agent
atlocation
Study
{'strength': 1}

Agent
atlocation
Library
{'strength': 1}

Library
south
Wall
{'strength': 1}

Library
west
Study
{'strength': 1}

Shelf
atlocation
Library
{'strength': 1}

Library
east
Sunroom
{'strength': 1}

Library

In [20]:
[string_map[foo] for foo in stats_interest[train_of_interest]["answers"][step_of_interest]]

['Guest',
 'Craft',
 'Library',
 'Storage',
 'Game',
 'Sunroom',
 'Guest',
 'Guest',
 'Foyer',
 'Lounge']

In [21]:
for foo in stats_interest[train_of_interest]["questions"][step_of_interest]:
    for bar in foo:
        try:
            print(string_map[bar])
        except:
            print(bar)
    print()

Tool
atlocation
?
20

Sam
atlocation
?
20

Shelf
atlocation
?
20

Jamie
atlocation
?
20

Table
atlocation
?
20

Couch
atlocation
?
20

Tool
atlocation
?
20

Bed
atlocation
?
20

Bag
atlocation
?
20

Book
atlocation
?
20



In [22]:
stats_interest[train_of_interest]["rewards"][step_of_interest]

[0, 0, 1, 1, 1, 1, 0, 1, 1, 0]

In [34]:
results["test_score"]

{'mean': 569.1, 'std': 34.98}