In [1]:
import environment.job_search_environment as job_search_env
import argparse
import os
import random
from supersuit import pad_observations_v0, pad_action_space_v0

import ray
from ray import tune
from ray.tune.registry import register_env




In [2]:
from gym.spaces import Discrete, Dict, Tuple, Box

from gym.spaces.utils import flatten, flatdim, flatten_space

In [3]:
# NOTE: This is working on the Zoo, but not on my local machine (M1 compatibility issues)
from ray.rllib.algorithms.ppo import PPOConfig

In [4]:
import numpy as np

In [5]:
import gym

In [6]:
from ray import tune
from ray.rllib.models.modelv2 import restore_original_dimensions
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.tune.registry import register_env
import torch
from torch import nn
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.torch_utils import FLOAT_MIN
from ray.rllib.models.preprocessors import Preprocessor, DictFlatteningPreprocessor

In [7]:
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()

In [8]:
ray.shutdown()

In [9]:
ray.init()

E1214 01:26:54.308235518  736151 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E1214 01:26:54.340299070  736151 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E1214 01:26:56.482461072  736151 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E1214 01:26:56.511033004  736151 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
2022-12-14 01:26:56,538	INFO worker.py:1528 -- Started a local Ray instance.


0,1
Python version:,3.10.8
Ray version:,2.1.0


In [10]:
"""
In order to deal with the Dictionary space, need to pass a custom model to RLlib.
See: https://medium.com/@nima.siboni/rllib-with-dictionary-state-baa06b64470f
"""
class CandidateModelV0(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
        orig_space = getattr(obs_space, "original_space", obs_space)
        assert (
            isinstance(orig_space, Dict)
            and "action_mask" in orig_space.spaces
            and "observation" in orig_space.spaces
        )
        print("Orig space")
        print(orig_space)
        print("Obs space")
        print(obs_space)
        print("Flattened obs space")
        print(flatten_space(orig_space["observation"]))
        print("Act space")
        print(action_space)
        print("Num outputs")
        print(num_outputs)
        print("Model config") 
        print(model_config)
        
#         self.orig_space = orig_space
        
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs)
        nn.Module.__init__(self)
        
        self.internal_model = TorchFC(
            flatten_space(orig_space["observation"]),
            action_space,
            num_outputs,
            model_config,
            name + "_internal",
        )
        # disable action masking --> will likely lead to invalid actions
        self.no_masking = False
        if "no_masking" in model_config["custom_model_config"]:
            self.no_masking = model_config["custom_model_config"]["no_masking"]
        
    def forward(self, input_dict, state, seq_lens):
        # Extract the available actions tensor from the observation.
        action_mask = input_dict["obs"]["action_mask"]
        
#         print("original_model")
        
#         print(input_dict["obs_flat"][:,self.num_outputs:])
#         print(input_dict["obs_flat"][:,self.num_outputs:].size())
        
        # Compute the unmasked logits.
        logits, _ = self.internal_model({"obs": input_dict["obs_flat"][:,self.num_outputs:]})
        
        
#         print("logits:\n", logits)
        
        
        # If action masking is disabled, directly return unmasked logits
        if self.no_masking:
            return logits, state

        # Convert action_mask into a [0.0 || -inf]-type mask.
        inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
        masked_logits = logits + inf_mask

#         print("masks:\n", inf_mask, "\n", masked_logits)
        
        # Return masked logits.
        return masked_logits, state

    def value_function(self):
        return self.internal_model.value_function()

In [11]:
def env_creator(args):
    env = job_search_env.env()
    return env

In [12]:
env_name = "job_search_env"
register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

In [13]:
ModelCatalog.register_custom_model("CandidateModelV0", CandidateModelV0)

In [14]:
# TODO: use policy_map to map different policies to candidate and employer agents

In [15]:
# TODO: need a custom preprocessor for the custom model with action masking
config = (
    PPOConfig()
    .environment(env=env_name, clip_actions=False)
    .debugging(log_level="ERROR")
    .framework(framework="torch")
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))

    .training(model={
                        "custom_model": CandidateModelV0,
                        "custom_model_config": {},
    })
).to_dict()

In [16]:
job_search_env.env().agents

['candidate_0',
 'candidate_1',
 'candidate_2',
 'candidate_3',
 'candidate_4',
 'employer_0',
 'employer_1',
 'employer_2',
 'employer_3',
 'employer_4']

In [17]:
config["observation_space"] = job_search_env.env().observation_space("candidate_0")
config["action_space"] = job_search_env.env().action_space("candidate_0")

In [18]:
config["observation_space"]

Dict(action_mask:Box(0.0, 1.0, (212,), float32), observation:Dict(candidate_obs:Dict(accepted_offer:Dict(employer_0:Discrete(101), employer_1:Discrete(101), employer_2:Discrete(101), employer_3:Discrete(101), employer_4:Discrete(101)), counter_offers:Dict(employer_0:Tuple(Discrete(101), Discrete(101)), employer_1:Tuple(Discrete(101), Discrete(101)), employer_2:Tuple(Discrete(101), Discrete(101)), employer_3:Tuple(Discrete(101), Discrete(101)), employer_4:Tuple(Discrete(101), Discrete(101))), current_offers:Dict(employer_0:Tuple(Discrete(101), Discrete(101)), employer_1:Tuple(Discrete(101), Discrete(101)), employer_2:Tuple(Discrete(101), Discrete(101)), employer_3:Tuple(Discrete(101), Discrete(101)), employer_4:Tuple(Discrete(101), Discrete(101))), job_openings:Dict(employer_0:Discrete(2), employer_1:Discrete(2), employer_2:Discrete(2), employer_3:Discrete(2), employer_4:Discrete(2)), rejected_offers:Dict(employer_0:Tuple(Discrete(2), Discrete(101)), employer_1:Tuple(Discrete(2), Discre

In [19]:
config["action_space"]

Tuple(Discrete(5), Discrete(5), Discrete(101), Discrete(101))

In [20]:
config

{'extra_python_environs_for_driver': {},
 'extra_python_environs_for_worker': {},
 'num_gpus': 0,
 'num_cpus_per_worker': 1,
 'num_gpus_per_worker': 0,
 '_fake_gpus': False,
 'custom_resources_per_worker': {},
 'placement_strategy': 'PACK',
 'eager_tracing': False,
 'eager_max_retraces': 20,
 'tf_session_args': {'intra_op_parallelism_threads': 2,
  'inter_op_parallelism_threads': 2,
  'gpu_options': {'allow_growth': True},
  'log_device_placement': False,
  'device_count': {'CPU': 1},
  'allow_soft_placement': True},
 'local_tf_session_args': {'intra_op_parallelism_threads': 8,
  'inter_op_parallelism_threads': 8},
 'env': 'job_search_env',
 'env_config': {},
 'observation_space': Dict(action_mask:Box(0.0, 1.0, (212,), float32), observation:Dict(candidate_obs:Dict(accepted_offer:Dict(employer_0:Discrete(101), employer_1:Discrete(101), employer_2:Discrete(101), employer_3:Discrete(101), employer_4:Discrete(101)), counter_offers:Dict(employer_0:Tuple(Discrete(101), Discrete(101)), employ

In [None]:
tune.run(
    "PPO",
    name="PPO",
    stop={"timesteps_total": 1000000},
    checkpoint_freq=10,
    local_dir="~/ray_results/" + env_name,
    config=config,
)

[2m[36m(pid=736833)[0m 
[2m[36m(PPO pid=736833)[0m 2022-12-14 01:27:12,638	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=736833)[0m 2022-12-14 01:27:12,639	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=736865)[0m 
[2m[36m(pid=736864)[0m 


### Use gym.space instead of gymnasium.spaces to fix this AHHHHH
```
from gym.spaces import Discrete, Dict, Tuple

from gym.spaces.utils import flatten, flatdim
```

In [234]:
from gym.spaces import Discrete, Dict, Tuple, Box

from gym.spaces.utils import flatten, flatdim

In [243]:
space = Dict({"test": Discrete(10)})
space["test"]

Discrete(10)

In [235]:
np.zeros(flatdim(Tuple((Discrete(4), Discrete(2)))))

array([0., 0., 0., 0., 0., 0.])

In [236]:
Box(0.0, 1.0, shape=(10,)).sample()

array([0.66815686, 0.8099814 , 0.6728993 , 0.3833307 , 0.83226   ,
       0.01675155, 0.36507058, 0.9905923 , 0.5650419 , 0.37177613],
      dtype=float32)

In [237]:
Box(0.0, 1, shape=(10,)).sample()

array([0.8192585 , 0.21756119, 0.7595661 , 0.06519676, 0.5593857 ,
       0.6580883 , 0.89733326, 0.01662113, 0.39561164, 0.0882608 ],
      dtype=float32)

In [238]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [239]:
flatten(Tuple((Discrete(10), Discrete(5))), (0,0))

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

In [240]:
action_mask = torch.from_numpy(np.array([1, 0]))
action_mask 

tensor([1, 0])

In [241]:
inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
inf_mask

tensor([ 0.0000e+00, -3.4000e+38])

In [17]:
logits = torch.tensor([1,1])
logits + inf_mask

tensor([ 1.0000e+00, -3.4000e+38])

In [16]:
torch.tensor([0, 1]) + torch.tensor([1, 0])

tensor([1, 1])

# Run 1
Got the error: 
```
AssertionError: Observation spaces for all agents must be identical. Perhaps SuperSuit's pad_observations wrapper can help (useage: `supersuit.aec_wrappers.pad_observations(env)`
```

# Run 2

```
AssertionError: homogenization only supports Discrete and Box spaces
```

# Run 3

Same error
```
Traceback (most recent call last):
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 1050, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/worker.py", line 2291, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=1879526, ip=128.36.108.57, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 139, in __init__
    self.add_workers(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 490, in add_workers
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 620, in foreach_worker
    remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=1879573, ip=128.36.108.57, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fa145f35d50>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 492, in __init__
    self.env = env_creator(copy.deepcopy(self.env_context))
  File "/tmp/ipykernel_1861406/2067092441.py", line 2, in <lambda>
  File "/tmp/ipykernel_1861406/841127696.py", line 3, in env_creator
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/supersuit/multiagent_wrappers/padding_wrappers.py", line 33, in pad_observations_v0
    homogenize_ops.check_homogenize_spaces(spaces)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/supersuit/utils/action_transforms/homogenize_ops.py", line 30, in check_homogenize_spaces
    assert False, "homogenization only supports Discrete and Box spaces"
AssertionError: homogenization only supports Discrete and Box spaces

During handling of the above exception, another exception occurred:

ray::PPO.__init__() (pid=1879526, ip=128.36.108.57, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 414, in __init__
    super().__init__(config=config, logger_creator=logger_creator, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 161, in __init__
    self.setup(copy.deepcopy(self.config))
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 549, in setup
    raise e.args[0].args[2]
AssertionError: homogenization only supports Discrete and Box spaces
```

i.e. I cannot use the SuperSuit wrapper to fix the issue of observation spaces for all agents needing to be identical.

# Run 4

Fixed the issue by making all observation spaces and action spaces the same for all agents.

New issue:

```
Failure # 1 (occurred at 2022-12-12_17-37-17)
Traceback (most recent call last):
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 1050, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/worker.py", line 2291, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=499392, ip=128.36.232.21, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 139, in __init__
    self.add_workers(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 490, in add_workers
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 620, in foreach_worker
    remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=499452, ip=128.36.232.21, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f8059b28df0>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 567, in __init__
    self.policy_dict = _determine_spaces_for_multi_agent_dict(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 2121, in _determine_spaces_for_multi_agent_dict
    raise ValueError(
ValueError: `observation_space` not provided in PolicySpec for default_policy and env does not have an observation space OR no spaces received from other workers' env(s) OR no `observation_space` specified in config!

During handling of the above exception, another exception occurred:

ray::PPO.__init__() (pid=499392, ip=128.36.232.21, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 414, in __init__
    super().__init__(config=config, logger_creator=logger_creator, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 161, in __init__
    self.setup(copy.deepcopy(self.config))
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 549, in setup
    raise e.args[0].args[2]
ValueError: `observation_space` not provided in PolicySpec for default_policy and env does not have an observation space OR no spaces received from other workers' env(s) OR no `observation_space` specified in config!
```

# Run 5

```
Failure # 1 (occurred at 2022-12-12_20-29-44)
Traceback (most recent call last):
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 1050, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/_private/worker.py", line 2291, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=2054378, ip=128.36.108.57, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 139, in __init__
    self.add_workers(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 490, in add_workers
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/worker_set.py", line 620, in foreach_worker
    remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=2054502, ip=128.36.108.57, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f0abd339d50>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 625, in __init__
    self._build_policy_map(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1882, in _build_policy_map
    preprocessor = ModelCatalog.get_preprocessor_for_space(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/models/catalog.py", line 815, in get_preprocessor_for_space
    prep = cls(observation_space, options)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/models/preprocessors.py", line 42, in __init__
    self._size = int(np.product(self.shape))
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

During handling of the above exception, another exception occurred:

ray::PPO.__init__() (pid=2054378, ip=128.36.108.57, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 414, in __init__
    super().__init__(config=config, logger_creator=logger_creator, **kwargs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 161, in __init__
    self.setup(copy.deepcopy(self.config))
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 549, in setup
    raise e.args[0].args[2]
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
```

Note: the `Dict` space does not have a shape <https://github.com/openai/gym/blob/master/gym/spaces/dict.py#L118>

Nope, the issue was I was using the wrong spaces from gymnasium.spaces, when instead RLlib assumes using gym.spaces spaces

# Run 6

```
2022-12-12 21:59:59,682	ERROR trial_runner.py:993 -- Trial PPO_job_search_30f4a_00000: Error processing event.
ray.exceptions.RayTaskError(ValueError): ray::PPO.train() (pid=1671497, ip=128.36.232.24, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 355, in train
    raise skipped from exception_cause(skipped)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 352, in train
    result = self.step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 772, in step
    results, train_iter_ctx = self._run_one_training_iteration()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2953, in _run_one_training_iteration
    num_recreated += self.try_recover_from_step_attempt(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2617, in try_recover_from_step_attempt
    raise error
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2948, in _run_one_training_iteration
    results = self.training_step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 408, in training_step
    train_batch = synchronous_parallel_sample(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
ray.exceptions.RayTaskError(ValueError): ray::RolloutWorker.sample() (pid=1671528, ip=128.36.232.24, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f25d5759db0>)
ValueError: The two structures don't have the same nested structure.

First structure: type=dict str={'observation': {'candidate_obs': {'job_openings': {'employer_0': 1}, 'accepted_offer': {'employer_0': 0}, 'current_offers': {'employer_0': (0, 0)}, 'rejected_offers': {'employer_0': (0, 0)}, 'counter_offers': {'employer_0': (0, 0)}}, 'employer_obs': {'candidate_strengths': {'candidate_0': 0}, 'job_applicants': {'candidate_0': 0}, 'outstanding_offers': {'candidate_0': (0, 0)}, 'accepted_offers': {'candidate_0': 0}, 'declined_offers': {'candidate_0': (0, 0)}, 'counter_offers': {'candidate_0': (0, 0)}, 'rejected_offers': {'candidate_0': (0, 0)}, 'remaining_budget': 100}}, 'action_mask': array([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])}

Second structure: type=OrderedDict str=OrderedDict([('candidate_obs', OrderedDict([('accepted_offer', OrderedDict([('employer_0', 56)])), ('counter_offers', OrderedDict([('employer_0', (99, 8))])), ('current_offers', OrderedDict([('employer_0', (28, 10))])), ('job_openings', OrderedDict([('employer_0', 1)])), ('rejected_offers', OrderedDict([('employer_0', (1, 49))]))])), ('employer_obs', OrderedDict([('accepted_offers', OrderedDict([('candidate_0', 1)])), ('candidate_strengths', OrderedDict([('candidate_0', 79)])), ('counter_offers', OrderedDict([('candidate_0', (66, 5))])), ('declined_offers', OrderedDict([('candidate_0', (1, 7))])), ('job_applicants', OrderedDict([('candidate_0', 1)])), ('outstanding_offers', OrderedDict([('candidate_0', (28, 0))])), ('rejected_offers', OrderedDict([('candidate_0', (0, 4))])), ('remaining_budget', 96)]))])

More specifically: Substructure "type=OrderedDict str=OrderedDict([('accepted_offer', OrderedDict([('employer_0', 56)])), ('counter_offers', OrderedDict([('employer_0', (99, 8))])), ('current_offers', OrderedDict([('employer_0', (28, 10))])), ('job_openings', OrderedDict([('employer_0', 1)])), ('rejected_offers', OrderedDict([('employer_0', (1, 49))]))])" is a sequence, while substructure "type=ndarray str=[0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0]" is not

During handling of the above exception, another exception occurred:

ray::RolloutWorker.sample() (pid=1671528, ip=128.36.232.24, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f25d5759db0>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 828, in sample
    batches = [self.input_reader.next()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
    batches = [self.get_data()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 285, in get_data
    item = next(self._env_runner)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 671, in _env_runner
    active_envs, to_eval, outputs = _process_observations(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 922, in _process_observations
    prep_obs = preprocessor.transform(raw_obs)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/models/preprocessors.py", line 283, in transform
    self.check_shape(observation)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/models/preprocessors.py", line 69, in check_shape
    observation = convert_element_to_space_type(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/utils/spaces/space_utils.py", line 359, in convert_element_to_space_type
    return tree.map_structure(map_, element, sampled_element, check_types=False)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/tree/__init__.py", line 428, in map_structure
    assert_same_structure(structures[0], other, check_types=check_types)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/tree/__init__.py", line 284, in assert_same_structure
    raise type(e)("%s\n"
ValueError: The two structures don't have the same nested structure.

First structure: type=dict str={'observation': {'candidate_obs': {'job_openings': {'employer_0': 1}, 'accepted_offer': {'employer_0': 0}, 'current_offers': {'employer_0': (0, 0)}, 'rejected_offers': {'employer_0': (0, 0)}, 'counter_offers': {'employer_0': (0, 0)}}, 'employer_obs': {'candidate_strengths': {'candidate_0': 0}, 'job_applicants': {'candidate_0': 0}, 'outstanding_offers': {'candidate_0': (0, 0)}, 'accepted_offers': {'candidate_0': 0}, 'declined_offers': {'candidate_0': (0, 0)}, 'counter_offers': {'candidate_0': (0, 0)}, 'rejected_offers': {'candidate_0': (0, 0)}, 'remaining_budget': 100}}, 'action_mask': array([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])}

Second structure: type=OrderedDict str=OrderedDict([('candidate_obs', OrderedDict([('accepted_offer', OrderedDict([('employer_0', 56)])), ('counter_offers', OrderedDict([('employer_0', (99, 8))])), ('current_offers', OrderedDict([('employer_0', (28, 10))])), ('job_openings', OrderedDict([('employer_0', 1)])), ('rejected_offers', OrderedDict([('employer_0', (1, 49))]))])), ('employer_obs', OrderedDict([('accepted_offers', OrderedDict([('candidate_0', 1)])), ('candidate_strengths', OrderedDict([('candidate_0', 79)])), ('counter_offers', OrderedDict([('candidate_0', (66, 5))])), ('declined_offers', OrderedDict([('candidate_0', (1, 7))])), ('job_applicants', OrderedDict([('candidate_0', 1)])), ('outstanding_offers', OrderedDict([('candidate_0', (28, 0))])), ('rejected_offers', OrderedDict([('candidate_0', (0, 4))])), ('remaining_budget', 96)]))])

More specifically: Substructure "type=OrderedDict str=OrderedDict([('accepted_offer', OrderedDict([('employer_0', 56)])), ('counter_offers', OrderedDict([('employer_0', (99, 8))])), ('current_offers', OrderedDict([('employer_0', (28, 10))])), ('job_openings', OrderedDict([('employer_0', 1)])), ('rejected_offers', OrderedDict([('employer_0', (1, 49))]))])" is a sequence, while substructure "type=ndarray str=[0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0]" is not
Entire first structure:
{'observation': {'candidate_obs': {'job_openings': {'employer_0': .}, 'accepted_offer': {'employer_0': .}, 'current_offers': {'employer_0': (., .)}, 'rejected_offers': {'employer_0': (., .)}, 'counter_offers': {'employer_0': (., .)}}, 'employer_obs': {'candidate_strengths': {'candidate_0': .}, 'job_applicants': {'candidate_0': .}, 'outstanding_offers': {'candidate_0': (., .)}, 'accepted_offers': {'candidate_0': .}, 'declined_offers': {'candidate_0': (., .)}, 'counter_offers': {'candidate_0': (., .)}, 'rejected_offers': {'candidate_0': (., .)}, 'remaining_budget': .}}, 'action_mask': .}
Entire second structure:
OrderedDict([('candidate_obs', OrderedDict([('accepted_offer', OrderedDict([('employer_0', .)])), ('counter_offers', OrderedDict([('employer_0', (., .))])), ('current_offers', OrderedDict([('employer_0', (., .))])), ('job_openings', OrderedDict([('employer_0', .)])), ('rejected_offers', OrderedDict([('employer_0', (., .))]))])), ('employer_obs', OrderedDict([('accepted_offers', OrderedDict([('candidate_0', .)])), ('candidate_strengths', OrderedDict([('candidate_0', .)])), ('counter_offers', OrderedDict([('candidate_0', (., .))])), ('declined_offers', OrderedDict([('candidate_0', (., .))])), ('job_applicants', OrderedDict([('candidate_0', .)])), ('outstanding_offers', OrderedDict([('candidate_0', (., .))])), ('rejected_offers', OrderedDict([('candidate_0', (., .))])), ('remaining_budget', .)]))])
```

# Run 7

Issues with observation/action_mask dictionary structure. Fixed by updating the observation space definition to also include the action mask.

# Run 8

```
Failure # 1 (occurred at 2022-12-13_13-53-45)
ray::PPO.train() (pid=2035053, ip=128.36.232.24, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 355, in train
    raise skipped from exception_cause(skipped)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 352, in train
    result = self.step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 772, in step
    results, train_iter_ctx = self._run_one_training_iteration()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2953, in _run_one_training_iteration
    num_recreated += self.try_recover_from_step_attempt(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2617, in try_recover_from_step_attempt
    raise error
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2948, in _run_one_training_iteration
    results = self.training_step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 408, in training_step
    train_batch = synchronous_parallel_sample(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
ray.exceptions.RayTaskError(ValueError): ray::RolloutWorker.sample() (pid=2035175, ip=128.36.232.24, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fa1c5255d80>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 828, in sample
    batches = [self.input_reader.next()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
    batches = [self.get_data()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 285, in get_data
    item = next(self._env_runner)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 721, in _env_runner
    base_env.send_actions(actions_to_send)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env.py", line 615, in send_actions
    raise e
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env.py", line 608, in send_actions
    obs, rewards, dones, infos = env.step(agent_dict)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/wrappers/pettingzoo_env.py", line 184, in step
    obss, rews, dones, infos = self.par_env.step(action_dict)
ValueError: too many values to unpack (expected 4)
```

# Run 9

```
Failure # 1 (occurred at 2022-12-13_14-18-31)
ray::PPO.train() (pid=2045783, ip=128.36.232.24, repr=PPO)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 355, in train
    raise skipped from exception_cause(skipped)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 352, in train
    result = self.step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 772, in step
    results, train_iter_ctx = self._run_one_training_iteration()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2953, in _run_one_training_iteration
    num_recreated += self.try_recover_from_step_attempt(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2617, in try_recover_from_step_attempt
    raise error
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 2948, in _run_one_training_iteration
    results = self.training_step()
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 408, in training_step
    train_batch = synchronous_parallel_sample(
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
ray.exceptions.RayTaskError(KeyError): ray::RolloutWorker.sample() (pid=2045906, ip=128.36.232.24, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f68b792dde0>)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 828, in sample
    batches = [self.input_reader.next()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
    batches = [self.get_data()]
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 285, in get_data
    item = next(self._env_runner)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py", line 721, in _env_runner
    base_env.send_actions(actions_to_send)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env.py", line 615, in send_actions
    raise e
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env.py", line 608, in send_actions
    obs, rewards, dones, infos = env.step(agent_dict)
  File "/home/accts/ahc49/.local/lib/python3.10/site-packages/ray/rllib/env/wrappers/pettingzoo_env.py", line 184, in step
    obss, rews, dones, infos = self.par_env.step(action_dict)
  File "/home/accts/ahc49/csec491/salary-negotation/environment/job_search_environment.py", line 394, in step
    action, target_index, new_offer_value, new_deadline = actions[agent]
KeyError: 'employer_0'
```

I didn't actually specify in the config to use the custom model sigh :(
