In [1]:
#!pip install -e git+https://github.com/casperbroch/ai-economist@stockmarket#egg=ai-economist

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import time

In [3]:
# Define the configuration of the environment that will be built

env_config_dict = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'stock_market_simulation',
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('BuyOrSellStocks', {}),
        # (2) Trading collectible resources
        ('ExecCircuitBreaker', {}),
    ],
    

    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 4,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 100, # Number of timesteps per episode
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': True,
    'dense_log_frequency': 1
}

In [4]:
from rllib.env_wrapper import RLlibEnvWrapper
env_obj = RLlibEnvWrapper({"env_config_dict": env_config_dict}, verbose=True)

Inside covid19_components.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.
Inside covid19_env.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


agent reward:  0.595165045951845
agent reward:  1.0
agent reward:  0.553073619819603
agent reward:  0.9699254984868743
planner reward:  1.0
[EnvWrapper] Spaces
[EnvWrapper] Obs (a)   
BuyOrSellStocks-stock_price: (1,)
action_mask    : (21,)
time           : (1,)
world-Endogenous-AbleToBuy: (1,)
world-Endogenous-AbleToSell: (1,)
world-Endogenous-AvailableFunds: (1,)
world-Endogenous-Demand: (1,)
world-Endogenous-Labor: (1,)
world-Endogenous-NumberOfStocks: (1,)
world-Endogenous-StockPrice: (1,)
world-Endogenous-StockPriceHistory: (100,)
world-Endogenous-StocksLeft: (1,)
world-Endogenous-Supply: (1,)
world-Endogenous-TotalBalance: (1,)
world-Endogenous-Volumes: (100,)


[EnvWrapper] Obs (p)   
action_mask    : (2,)
time           : (1,)
world-Prices_History: (100,)
world-Total_Demand: (1,)
world-Total_Supply: (1,)
world-Volumes  : (100,)


[EnvWrapper] Action (a) Discrete(21)
[EnvWrapper] Action (p) Discrete(2)


In [5]:
import ray
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
policies = {
    "a": (
        None,  # uses default policy
        env_obj.observation_space,
        env_obj.action_space,
        {}  # define a custom agent policy configuration.
    ),
    "p": (
        None,  # uses default policy
        env_obj.observation_space_pl,
        env_obj.action_space_pl,
        {}  # define a custom planner policy configuration.
    )
}

# In foundation, all the agents have integer ids and the social planner has an id of "p"
policy_mapping_fun = lambda i: "a" if str(i).isdigit() else "p"

policies_to_train = ["a", "p"]

In [7]:
trainer_config = {
    "multiagent": {
        "policies": policies,
        "policies_to_train": policies_to_train,
        "policy_mapping_fn": policy_mapping_fun,
    }
}

In [8]:
trainer_config.update(
    {
        "num_workers": 2,
        "num_envs_per_worker": 2,
        # Other training parameters
        "train_batch_size":  4000,
        "sgd_minibatch_size": 4000,
        "num_sgd_iter": 1
    }
)

In [9]:
# We also add the "num_envs_per_worker" parameter for the env. wrapper to index the environments.
env_config = {
    "env_config_dict": env_config_dict,
    "num_envs_per_worker": trainer_config.get('num_envs_per_worker'),   
}

trainer_config.update(
    {
        "env_config": env_config        
    }
)

In [10]:
# Initialize Ray
ray.init(webui_host="127.0.0.1")

2024-05-03 10:20:16,953	INFO resource_spec.py:212 -- Starting Ray with 4.59 GiB memory available for workers and up to 2.31 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2024-05-03 10:20:18,456	INFO services.py:1165 -- View the Ray dashboard at [1m[32m127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.1.41',
 'raylet_ip_address': '192.168.1.41',
 'redis_address': '192.168.1.41:6379',
 'object_store_address': 'tcp://127.0.0.1:63514',
 'raylet_socket_name': 'tcp://127.0.0.1:64322',
 'webui_url': '127.0.0.1:8265',
 'session_dir': 'C:\\Users\\caspe\\AppData\\Local\\Temp\\ray\\session_2024-05-03_10-20-16_942609_16728'}

In [11]:
# Create the PPO trainer.
trainer = PPOTrainer(
    env=RLlibEnvWrapper,
    config=trainer_config,
    )

2024-05-03 10:20:23,857	ERROR syncer.py:46 -- Log sync requires rsync to be installed.
2024-05-03 10:20:23,861	INFO trainer.py:585 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2024-05-03 10:20:23,862	INFO trainer.py:612 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


agent reward:  0.9753437118245103
agent reward:  0.8737585221382482
agent reward:  1.0
agent reward:  0.8638233651045629
planner reward:  1.0
agent reward:  0.75034604328897
agent reward:  0.8602335303174682
agent reward:  0.7431828666409536
agent reward:  1.0
planner reward:  1.0
[2m[36m(pid=9944)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=9944)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=9944)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=9944)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=9944)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=9944)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=6908)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=6908)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=6908)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=6908)[0m   _np_quint16 = np.

[2m[36m(pid=6908)[0m Inside covid19_components.py: 0 GPUs are available.
[2m[36m(pid=6908)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=6908)[0m Inside covid19_env.py: 0 GPUs are available.
[2m[36m(pid=6908)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=9944)[0m Inside covid19_components.py: 0 GPUs are available.
[2m[36m(pid=9944)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=9944)[0m Inside covid19_env.py: 0 GPUs are available.
[2m[36m(pid=9944)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.6443111885204135
[2m[36m(pid=6908)[0m agent reward:  0.925444354714466
[2m[36m(pid=6908)[0m agent reward:  0.6788522463599084
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.9304921207245731
[2m[36m(pid=9944)[0m agent reward:  0.8175078933588881
[2m[36m(pid=9944)[0m agent reward:  0.7237

2024-05-03 10:20:36,195	INFO trainable.py:181 -- _setup took 12.336 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [12]:
NUM_ITERS = 10
start_train = time.time()

for iteration in range(NUM_ITERS):
    print(f'********** Iter : {iteration} **********')
    start = time.time()
    result = trainer.train()
    length = time.time() - start
    print(f'''episode_reward_mean: {result.get('episode_reward_mean')}''')
    print(f'''it_time_taken: {length}''')

    
length_train = time.time() - start_train
print("Training took", length_train, " seconds.")

********** Iter : 0 **********
[2m[36m(pid=6908)[0m agent reward:  0.7424093639853993
[2m[36m(pid=6908)[0m agent reward:  0.9127210393312177
[2m[36m(pid=6908)[0m agent reward:  0.8642813947045876
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.9439251941555353
[2m[36m(pid=9944)[0m agent reward:  0.7524753150956048
[2m[36m(pid=9944)[0m agent reward:  0.850130056046837
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6971938401995664
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6851729574236387
[2m[36m(pid=9944)[0m agent reward:  0.8418676770701939
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.8109941040846143
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5417580087768544
[2m[36m(pid=6908)[0m a

[2m[36m(pid=9944)[0m planner reward:  -46.11360514450612
[2m[36m(pid=9944)[0m agent reward:  0.6443177098434547
[2m[36m(pid=9944)[0m agent reward:  0.5653133875202139
[2m[36m(pid=9944)[0m agent reward:  0.6502692179204267
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -52.76741605187174
[2m[36m(pid=9944)[0m agent reward:  0.5990225968494741
[2m[36m(pid=9944)[0m agent reward:  0.5697711470575809
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6298541455693956
[2m[36m(pid=9944)[0m planner reward:  -41.82627541837545
[2m[36m(pid=9944)[0m agent reward:  0.5465551577748097
[2m[36m(pid=9944)[0m agent reward:  0.5765335042721121
[2m[36m(pid=9944)[0m agent reward:  0.6631755044268148
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -52.82420512832539
[2m[36m(pid=9944)[0m agent reward:  0.5990225968494741
[2m[36m(pid=9944)[0m agent rewa

[2m[36m(pid=9944)[0m agent reward:  0.778561769492701
[2m[36m(pid=9944)[0m agent reward:  0.535428275831019
[2m[36m(pid=9944)[0m agent reward:  0.6228002275639726
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -59.27819679069657
[2m[36m(pid=9944)[0m agent reward:  0.3552143395605204
[2m[36m(pid=9944)[0m agent reward:  0.4403862228470863
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5096342392879261
[2m[36m(pid=9944)[0m planner reward:  -37.039208678592885
[2m[36m(pid=9944)[0m agent reward:  0.778561769492701
[2m[36m(pid=9944)[0m agent reward:  0.535428275831019
[2m[36m(pid=9944)[0m agent reward:  0.6228002275639726
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -58.447747425514194
[2m[36m(pid=9944)[0m agent reward:  0.3552143395605204
[2m[36m(pid=9944)[0m agent reward:  0.4403862228470863
[2m[36m(pid=9944)[0m agent reward: 

[2m[36m(pid=9944)[0m agent reward:  0.7594682238077036
[2m[36m(pid=9944)[0m agent reward:  0.8675517685281589
[2m[36m(pid=9944)[0m agent reward:  0.7604002109569802
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -65.93211695064007
[2m[36m(pid=9944)[0m agent reward:  0.47079554925575146
[2m[36m(pid=9944)[0m agent reward:  0.3193512998323921
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5839576707932019
[2m[36m(pid=9944)[0m planner reward:  -35.273459338433206
[2m[36m(pid=9944)[0m agent reward:  0.7594682238077036
[2m[36m(pid=9944)[0m agent reward:  0.8675517685281589
[2m[36m(pid=9944)[0m agent reward:  0.7604002109569802
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -65.73178424894807
[2m[36m(pid=9944)[0m agent reward:  0.48390173912564216
[2m[36m(pid=9944)[0m agent reward:  0.32044858932597337
[2m[36m(pid=9944)[0m agent re

[2m[36m(pid=9944)[0m agent reward:  0.8765308254018958
[2m[36m(pid=9944)[0m agent reward:  0.9610330425790434
[2m[36m(pid=9944)[0m agent reward:  0.7482891545522095
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -63.120114861228956
[2m[36m(pid=9944)[0m agent reward:  0.48063672758235354
[2m[36m(pid=9944)[0m agent reward:  0.32370861961070446
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5904772442514148
[2m[36m(pid=9944)[0m planner reward:  -35.65533847482286
[2m[36m(pid=9944)[0m agent reward:  0.8765308254018958
[2m[36m(pid=9944)[0m agent reward:  0.9610330425790434
[2m[36m(pid=9944)[0m agent reward:  0.7482891545522095
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -61.50921948364678
[2m[36m(pid=9944)[0m agent reward:  0.48083510847712324
[2m[36m(pid=9944)[0m agent reward:  0.3203921551093576
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[

[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.562276386528291
[2m[36m(pid=6908)[0m agent reward:  0.29029507314076836
[2m[36m(pid=6908)[0m agent reward:  0.252052323803999
[2m[36m(pid=6908)[0m planner reward:  -31.62520551659312
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.22354589367207556
[2m[36m(pid=9944)[0m agent reward:  0.21216724982121046
[2m[36m(pid=9944)[0m agent reward:  0.28258936336815477
[2m[36m(pid=9944)[0m planner reward:  -66.63086591983544
[2m[36m(pid=6908)[0m agent reward:  0.7512325067359723
[2m[36m(pid=6908)[0m agent reward:  0.08430261463022683
[2m[36m(pid=6908)[0m agent reward:  0.5540339530730416
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.9397858783466344
[2m[36m(pid=9944)[0m agent reward:  0.9036614467969558
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.583709281

[2m[36m(pid=6908)[0m agent reward:  0.6625922558612407
[2m[36m(pid=6908)[0m agent reward:  0.34382256666559136
[2m[36m(pid=6908)[0m agent reward:  0.24803418004886366
[2m[36m(pid=6908)[0m planner reward:  -31.801961591875454
[2m[36m(pid=6908)[0m agent reward:  0.36571733311939225
[2m[36m(pid=6908)[0m agent reward:  0.061327793067480436
[2m[36m(pid=6908)[0m agent reward:  0.2508534365416619
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  -47.35748930061978
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.6625922558612407
[2m[36m(pid=6908)[0m agent reward:  0.34382256666559136
[2m[36m(pid=6908)[0m agent reward:  0.24803418004886366
[2m[36m(pid=6908)[0m planner reward:  -31.295133848212288
[2m[36m(pid=6908)[0m agent reward:  0.3657947687255558
[2m[36m(pid=6908)[0m agent reward:  0.061327097531371125
[2m[36m(pid=6908)[0m agent reward:  0.25080881861099374
[2m[36m

[2m[36m(pid=9944)[0m agent reward:  0.20535132989984883
[2m[36m(pid=9944)[0m agent reward:  0.17496774860800995
[2m[36m(pid=9944)[0m agent reward:  0.11654579319846955
[2m[36m(pid=9944)[0m planner reward:  -143.59007506370054
[2m[36m(pid=9944)[0m agent reward:  0.24598123579703593
[2m[36m(pid=9944)[0m agent reward:  0.42240276045942937
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7588490118141955
[2m[36m(pid=9944)[0m planner reward:  -23.159346581573416
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.210012669843464
[2m[36m(pid=9944)[0m agent reward:  0.1762662456892097
[2m[36m(pid=9944)[0m agent reward:  0.11691542312122272
[2m[36m(pid=9944)[0m planner reward:  -134.00238938139498
[2m[36m(pid=9944)[0m agent reward:  0.24598123579703593
[2m[36m(pid=9944)[0m agent reward:  0.42240276045942937
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m ag

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.21011616410351866
[2m[36m(pid=9944)[0m agent reward:  0.17710860678577
[2m[36m(pid=9944)[0m agent reward:  0.10661487349480779
[2m[36m(pid=9944)[0m planner reward:  -100.87677409262254
[2m[36m(pid=9944)[0m agent reward:  0.20484074889551224
[2m[36m(pid=9944)[0m agent reward:  0.42723104358827196
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.767219163219908
[2m[36m(pid=9944)[0m planner reward:  -22.26199562081493
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.20051833551687484
[2m[36m(pid=9944)[0m agent reward:  0.16925583794215235
[2m[36m(pid=9944)[0m agent reward:  0.10623819599876881
[2m[36m(pid=9944)[0m planner reward:  -98.88685991547655
[2m[36m(pid=9944)[0m agent reward:  0.21546686945476046
[2m[36m(pid=9944)[0m agent reward:  0.4516493845328762
[2m[36m(pid=9944)[0m agent 

[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7246934382134613
[2m[36m(pid=6908)[0m agent reward:  0.8500812419169061
[2m[36m(pid=6908)[0m agent reward:  0.7639669667986054
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5950001199661408
[2m[36m(pid=9944)[0m agent reward:  0.8311798708511733
[2m[36m(pid=9944)[0m agent reward:  0.4717710073128189
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.9174878965652123
[2m[36m(pid=9944)[0m agent reward:  0.6450794937635638
[2m[36m(pid=9944)[0m agent reward:  0.9656527694085913
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5527890187702582
[2m[36m(pid=6908)[0m agent reward:  0.5221342394475572
[2m[36m(pid=6908)[0m agent reward:  0.543659506067717
[2m[36m(pid=6908)[0m agent reward:  1.

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5211118787257916
[2m[36m(pid=9944)[0m agent reward:  0.5639516571139697
[2m[36m(pid=9944)[0m agent reward:  0.3565300648829169
[2m[36m(pid=9944)[0m planner reward:  -16.653768817829395
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4617900608992068
[2m[36m(pid=9944)[0m agent reward:  0.6931370635961835
[2m[36m(pid=9944)[0m agent reward:  0.6627846906348628
[2m[36m(pid=9944)[0m planner reward:  -33.836314791962515
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5211118787257916
[2m[36m(pid=9944)[0m agent reward:  0.5639516571139697
[2m[36m(pid=9944)[0m agent reward:  0.3565300648829169
[2m[36m(pid=9944)[0m planner reward:  -21.215161798688563
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4617900608992068
[2m[36m(pid=9944)[0m agent reward:  0.6931370

[2m[36m(pid=9944)[0m agent reward:  0.4906138178968526
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4378245826488983
[2m[36m(pid=9944)[0m agent reward:  0.21922415078002955
[2m[36m(pid=9944)[0m planner reward:  -45.15167476332944
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4680104820646493
[2m[36m(pid=9944)[0m agent reward:  0.6619553639718753
[2m[36m(pid=9944)[0m agent reward:  0.28971044757983655
[2m[36m(pid=9944)[0m planner reward:  -21.830912838548606
[2m[36m(pid=9944)[0m agent reward:  0.4907251899801115
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.43855136967670766
[2m[36m(pid=9944)[0m agent reward:  0.21921996586581258
[2m[36m(pid=9944)[0m planner reward:  -44.39876667146194
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4680104820646493
[2m[36m(pid=9944)[0m agent reward:  0.66195

[2m[36m(pid=9944)[0m agent reward:  0.7666556367853173
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.43787278287871834
[2m[36m(pid=9944)[0m agent reward:  0.19769545852096715
[2m[36m(pid=9944)[0m planner reward:  -38.541930159783895
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.4616333470994577
[2m[36m(pid=9944)[0m agent reward:  0.6264500135465203
[2m[36m(pid=9944)[0m agent reward:  0.27243913840326145
[2m[36m(pid=9944)[0m planner reward:  -21.680438473362532
[2m[36m(pid=9944)[0m agent reward:  0.7666556367853173
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.43787278287871834
[2m[36m(pid=9944)[0m agent reward:  0.19769545852096715
[2m[36m(pid=9944)[0m planner reward:  -38.520262373724584
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.45938041689038733
[2m[36m(pid=9944)[0m agent reward:  0.6

[2m[36m(pid=9944)[0m agent reward:  0.8080698893936481
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.45080076158268056
[2m[36m(pid=9944)[0m agent reward:  0.18463713023993875
[2m[36m(pid=9944)[0m planner reward:  -32.667968828533425
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.45225200457225617
[2m[36m(pid=9944)[0m agent reward:  0.618332225619171
[2m[36m(pid=9944)[0m agent reward:  0.26432336086891906
[2m[36m(pid=9944)[0m planner reward:  -22.21890559555875
[2m[36m(pid=9944)[0m agent reward:  0.8080698893936481
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.45080076158268056
[2m[36m(pid=9944)[0m agent reward:  0.18463713023993875
[2m[36m(pid=9944)[0m planner reward:  -32.66912926330022
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.45225200457225617
[2m[36m(pid=9944)[0m agent reward:  0.618

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.682518373156084
[2m[36m(pid=9944)[0m agent reward:  0.36860235562009863
[2m[36m(pid=9944)[0m agent reward:  0.3010003165507978
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.9285865027428094
[2m[36m(pid=6908)[0m agent reward:  0.6168652400517265
[2m[36m(pid=6908)[0m agent reward:  0.9035356149567588
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7235740309740344
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5645817668849528
[2m[36m(pid=6908)[0m agent reward:  0.8461704365882154
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.9285865027428094
[2m[36m(pid=6908)[0m agent reward:  0.6142331154819768
[2m[36m(pid=6908)[0m agent reward:  0.8995874281021344
[2m[36m(pid=6908)[0m agent reward:  1

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8763707705434597
[2m[36m(pid=9944)[0m agent reward:  0.4630165918717419
[2m[36m(pid=9944)[0m agent reward:  0.2948950197293179
[2m[36m(pid=9944)[0m planner reward:  -37.19462236786785
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6848150887512363
[2m[36m(pid=9944)[0m agent reward:  0.41793176205513033
[2m[36m(pid=9944)[0m agent reward:  0.7446382213364533
[2m[36m(pid=9944)[0m planner reward:  -39.27284006446056
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8711292148119628
[2m[36m(pid=9944)[0m agent reward:  0.47337910041710196
[2m[36m(pid=9944)[0m agent reward:  0.26995528661691354
[2m[36m(pid=9944)[0m planner reward:  -36.242788509115144
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5860719101049822
[2m[36m(pid=9944)[0m agent reward:  0.343488

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7571371302862342
[2m[36m(pid=9944)[0m agent reward:  0.3499347298614421
[2m[36m(pid=9944)[0m agent reward:  0.2350005999881598
[2m[36m(pid=9944)[0m planner reward:  -34.50384688464488
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.44531909891843735
[2m[36m(pid=9944)[0m agent reward:  0.4546902912845462
[2m[36m(pid=9944)[0m agent reward:  0.7229568121204478
[2m[36m(pid=9944)[0m planner reward:  -23.78675241804168
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7571371302862342
[2m[36m(pid=9944)[0m agent reward:  0.3499347298614421
[2m[36m(pid=9944)[0m agent reward:  0.2350005999881598
[2m[36m(pid=9944)[0m planner reward:  -38.87172903724686
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.44531909891843735
[2m[36m(pid=9944)[0m agent reward:  0.45469029

[2m[36m(pid=9944)[0m agent reward:  0.8204853637087982
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5744940867167037
[2m[36m(pid=9944)[0m agent reward:  0.12979867622071856
[2m[36m(pid=9944)[0m planner reward:  -82.48700092351112
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.31008732986458937
[2m[36m(pid=9944)[0m agent reward:  0.3932355016730929
[2m[36m(pid=9944)[0m agent reward:  0.6098569191480796
[2m[36m(pid=9944)[0m planner reward:  -21.241308109041146
[2m[36m(pid=9944)[0m agent reward:  0.8235195911280859
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5719149261836077
[2m[36m(pid=9944)[0m agent reward:  0.13016161813442562
[2m[36m(pid=9944)[0m planner reward:  -81.26227598278133
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.3099095446303589
[2m[36m(pid=9944)[0m agent reward:  0.395401

[2m[36m(pid=9944)[0m agent reward:  0.8608206441183678
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6834775664242257
[2m[36m(pid=9944)[0m agent reward:  0.1309231200724349
[2m[36m(pid=9944)[0m planner reward:  -73.99226526620814
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.30258800570166233
[2m[36m(pid=9944)[0m agent reward:  0.3826672109789832
[2m[36m(pid=9944)[0m agent reward:  0.597815876498806
[2m[36m(pid=9944)[0m planner reward:  -20.730564675399947
[2m[36m(pid=9944)[0m agent reward:  0.8608206441183678
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6834775664242257
[2m[36m(pid=9944)[0m agent reward:  0.1309231200724349
[2m[36m(pid=9944)[0m planner reward:  -83.84798694709846
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.30258800570166233
[2m[36m(pid=9944)[0m agent reward:  0.38266721

[2m[36m(pid=6908)[0m agent reward:  0.8399309495324276
[2m[36m(pid=6908)[0m agent reward:  0.7241495648081641
[2m[36m(pid=6908)[0m agent reward:  0.5802117856360176
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5771882484124985
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.9313303294218366
[2m[36m(pid=9944)[0m agent reward:  0.7494823821399765
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7792587126966362
[2m[36m(pid=6908)[0m agent reward:  0.769801139457125
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.8733051541416704
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6405098299056524
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.40453702595643626
[2m[36m(pid=9944)[0m agent reward:  0.62136816885720

[2m[36m(pid=9944)[0m agent reward:  0.6456304995011497
[2m[36m(pid=9944)[0m agent reward:  0.9044329072777396
[2m[36m(pid=9944)[0m agent reward:  0.7093970182810104
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -22.46726464584367
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2661950367894282
[2m[36m(pid=9944)[0m agent reward:  0.1433275910206369
[2m[36m(pid=9944)[0m agent reward:  0.6230776511911881
[2m[36m(pid=9944)[0m planner reward:  -48.916326829536445
[2m[36m(pid=9944)[0m agent reward:  0.5930571831886594
[2m[36m(pid=9944)[0m agent reward:  0.8813278591798045
[2m[36m(pid=9944)[0m agent reward:  0.6504608033892324
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -20.65347464123051
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2661950367894282
[2m[36m(pid=9944)[0m agent reward:  0.143327591

[2m[36m(pid=9944)[0m agent reward:  0.43359646150590075
[2m[36m(pid=9944)[0m agent reward:  0.678108902158077
[2m[36m(pid=9944)[0m agent reward:  0.48356702281291475
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -21.12167725537081
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2602313124121499
[2m[36m(pid=9944)[0m agent reward:  0.14984766543127834
[2m[36m(pid=9944)[0m agent reward:  0.6915955446884491
[2m[36m(pid=9944)[0m planner reward:  -44.97252930183086
[2m[36m(pid=9944)[0m agent reward:  0.43359646150590075
[2m[36m(pid=9944)[0m agent reward:  0.678108902158077
[2m[36m(pid=9944)[0m agent reward:  0.48356702281291475
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -21.56899491540821
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.24907067164685484
[2m[36m(pid=9944)[0m agent reward:  0.146087

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.19951781596139034
[2m[36m(pid=9944)[0m agent reward:  0.1267770316839857
[2m[36m(pid=9944)[0m agent reward:  0.8772597106909108
[2m[36m(pid=9944)[0m planner reward:  -49.762998257968654
[2m[36m(pid=9944)[0m agent reward:  0.553588084686822
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5830326553034293
[2m[36m(pid=9944)[0m agent reward:  0.9924930908713798
[2m[36m(pid=9944)[0m planner reward:  -28.427491271103854
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.19951781596139034
[2m[36m(pid=9944)[0m agent reward:  0.1267770316839857
[2m[36m(pid=9944)[0m agent reward:  0.8772597106909108
[2m[36m(pid=9944)[0m planner reward:  -49.78522235515537
[2m[36m(pid=9944)[0m agent reward:  0.553588084686822
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.58303265

[2m[36m(pid=9944)[0m agent reward:  0.7759664206389727
[2m[36m(pid=9944)[0m agent reward:  0.9899717019625692
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8226253757407995
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7592642660763507
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.8628010606068962
[2m[36m(pid=6908)[0m agent reward:  0.8825287219754051
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8578484917505352
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6430090163121058
[2m[36m(pid=9944)[0m agent reward:  0.9589977373483152
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7689246769779486
[2m[36m(pid=9944)[0m agent reward:  0.9956669108803893
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.82130217288857

[2m[36m(pid=9944)[0m agent reward:  0.6607841852548861
[2m[36m(pid=9944)[0m agent reward:  0.5120842470019862
[2m[36m(pid=9944)[0m agent reward:  0.7994019464396245
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -22.155154161744477
[2m[36m(pid=9944)[0m agent reward:  0.5549964237506395
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5149496771838249
[2m[36m(pid=9944)[0m agent reward:  0.804771843326627
[2m[36m(pid=9944)[0m planner reward:  -48.7210531639195
[2m[36m(pid=9944)[0m agent reward:  0.6607841852548861
[2m[36m(pid=9944)[0m agent reward:  0.5120842470019862
[2m[36m(pid=9944)[0m agent reward:  0.7994019464396245
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -21.330492127931326
[2m[36m(pid=9944)[0m agent reward:  0.5549964237506395
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5149496771

[2m[36m(pid=9944)[0m agent reward:  0.4527810805042649
[2m[36m(pid=9944)[0m agent reward:  0.4068635799371815
[2m[36m(pid=9944)[0m agent reward:  0.7596999889862103
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -20.879785028962527
[2m[36m(pid=9944)[0m agent reward:  0.4985734086143145
[2m[36m(pid=9944)[0m agent reward:  0.8833529058656566
[2m[36m(pid=9944)[0m agent reward:  0.5445722460856617
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -84.18613765654273
[2m[36m(pid=9944)[0m agent reward:  0.4527810805042649
[2m[36m(pid=9944)[0m agent reward:  0.4068635799371815
[2m[36m(pid=9944)[0m agent reward:  0.7596999889862103
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -22.280890158001224
[2m[36m(pid=9944)[0m agent reward:  0.4985734086143145
[2m[36m(pid=9944)[0m agent reward:  0.8833529058656566
[2m[36m(pid=9944)[0m agent rewa

[2m[36m(pid=9944)[0m agent reward:  0.5419655046961394
[2m[36m(pid=9944)[0m agent reward:  0.5680867876977264
[2m[36m(pid=9944)[0m agent reward:  0.7032613234452331
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -24.353082011642062
[2m[36m(pid=9944)[0m agent reward:  0.5461689415609502
[2m[36m(pid=9944)[0m agent reward:  0.811428598329082
[2m[36m(pid=9944)[0m agent reward:  0.6379901008356897
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -93.21727125179264
[2m[36m(pid=9944)[0m agent reward:  0.5419655046961394
[2m[36m(pid=9944)[0m agent reward:  0.5680867876977264
[2m[36m(pid=9944)[0m agent reward:  0.7032613234452331
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -24.83096146400925
[2m[36m(pid=9944)[0m agent reward:  0.5461689415609502
[2m[36m(pid=9944)[0m agent reward:  0.811428598329082
[2m[36m(pid=9944)[0m agent reward:

[2m[36m(pid=9944)[0m agent reward:  0.41488607738141414
[2m[36m(pid=9944)[0m agent reward:  0.41258668929465936
[2m[36m(pid=9944)[0m agent reward:  0.6288381314828598
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -27.96576994286239
[2m[36m(pid=9944)[0m agent reward:  0.3932288635447595
[2m[36m(pid=9944)[0m agent reward:  0.8222129031853708
[2m[36m(pid=9944)[0m agent reward:  0.6392729553933235
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -81.12858944529523
[2m[36m(pid=9944)[0m agent reward:  0.41488607738141414
[2m[36m(pid=9944)[0m agent reward:  0.41258668929465936
[2m[36m(pid=9944)[0m agent reward:  0.6288381314828598
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -29.017335825074117
[2m[36m(pid=9944)[0m agent reward:  0.18195705626194084
[2m[36m(pid=9944)[0m agent reward:  0.6291217151076403
[2m[36m(pid=9944)[0m agent 

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7046248329808129
[2m[36m(pid=9944)[0m agent reward:  0.6622502378209656
[2m[36m(pid=9944)[0m agent reward:  0.603209958342647
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7453762245495322
[2m[36m(pid=6908)[0m agent reward:  0.8764728206104639
[2m[36m(pid=6908)[0m agent reward:  0.7072104174735688
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.5347489513308409
[2m[36m(pid=9944)[0m agent reward:  0.7723418670771254
[2m[36m(pid=9944)[0m agent reward:  0.821245105218829
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7004838565648894
[2m[36m(pid=9944)[0m agent reward:  0.6632633446567556
[2m[36m(pid=9944)[0m agent reward:  0.6031751707177639

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.23726151684644095
[2m[36m(pid=9944)[0m agent reward:  0.1637655475378212
[2m[36m(pid=9944)[0m agent reward:  0.27422417225366513
[2m[36m(pid=9944)[0m planner reward:  -43.30684128214377
[2m[36m(pid=9944)[0m agent reward:  0.5197781347853253
[2m[36m(pid=9944)[0m agent reward:  0.35848704626594863
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.30755340298077166
[2m[36m(pid=9944)[0m planner reward:  -80.14545265558124
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.20851660830239405
[2m[36m(pid=9944)[0m agent reward:  0.1675329279360903
[2m[36m(pid=9944)[0m agent reward:  0.2780891054411984
[2m[36m(pid=9944)[0m planner reward:  -41.048036485964055
[2m[36m(pid=9944)[0m agent reward:  0.5139607710137052
[2m[36m(pid=9944)[0m agent reward:  0.35819493012968157
[2m[36m(pid=9944)[0m agent

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.21015695792512018
[2m[36m(pid=9944)[0m planner reward:  -89.53359611556066
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.1104154156128341
[2m[36m(pid=9944)[0m agent reward:  0.129743451220377
[2m[36m(pid=9944)[0m agent reward:  0.16583368418124048
[2m[36m(pid=9944)[0m planner reward:  -45.64857333815349
[2m[36m(pid=9944)[0m agent reward:  0.16349709976786592
[2m[36m(pid=9944)[0m agent reward:  0.3506910674743677
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.21307019663985807
[2m[36m(pid=9944)[0m planner reward:  -89.08327280880204
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.10855398569367922
[2m[36m(pid=9944)[0m agent reward:  0.11850634528477642
[2m[36m(pid=9944)[0m agent reward:  0.1637322124640522
[2m[36m(pid=9944)[0m planner reward:  -48.0

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.10677138324490874
[2m[36m(pid=9944)[0m agent reward:  0.13975427621100123
[2m[36m(pid=9944)[0m agent reward:  0.1826108975621563
[2m[36m(pid=9944)[0m planner reward:  -45.13557353878183
[2m[36m(pid=9944)[0m agent reward:  0.07020292767203627
[2m[36m(pid=9944)[0m agent reward:  0.4249237415018388
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.3663423807933692
[2m[36m(pid=9944)[0m planner reward:  -69.71953466068234
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.10711325523984308
[2m[36m(pid=9944)[0m agent reward:  0.11780295175608055
[2m[36m(pid=9944)[0m agent reward:  0.18198485440310624
[2m[36m(pid=9944)[0m planner reward:  -47.344941643012596
[2m[36m(pid=9944)[0m agent reward:  0.057870735009797045
[2m[36m(pid=9944)[0m agent reward:  0.39623976055537613
[2m[36m(pid=9944)[0m ag

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.08939569957568913
[2m[36m(pid=9944)[0m agent reward:  0.23900897584644656
[2m[36m(pid=9944)[0m agent reward:  0.1725815097708263
[2m[36m(pid=9944)[0m planner reward:  -46.76493070641283
[2m[36m(pid=9944)[0m agent reward:  0.05082493636395289
[2m[36m(pid=9944)[0m agent reward:  0.420944004588038
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.3229935476195713
[2m[36m(pid=9944)[0m planner reward:  -73.98845135966499
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.08939569957568913
[2m[36m(pid=9944)[0m agent reward:  0.23900897584644656
[2m[36m(pid=9944)[0m agent reward:  0.1725815097708263
[2m[36m(pid=9944)[0m planner reward:  -48.38660458157221
[2m[36m(pid=9944)[0m agent reward:  0.05082493636395289
[2m[36m(pid=9944)[0m agent reward:  0.420944004588038
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[

[2m[36m(pid=9944)[0m agent reward:  0.4809958548691187
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.3983030168070126
[2m[36m(pid=9944)[0m agent reward:  0.7086319655733587
[2m[36m(pid=9944)[0m planner reward:  -47.42582988796729
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5901414236997015
[2m[36m(pid=6908)[0m agent reward:  0.6003092319226158
[2m[36m(pid=6908)[0m agent reward:  0.6500679467148612
[2m[36m(pid=6908)[0m planner reward:  -21.349687184891255
[2m[36m(pid=9944)[0m agent reward:  0.6575871914011802
[2m[36m(pid=9944)[0m agent reward:  0.8426402961745821
[2m[36m(pid=9944)[0m agent reward:  0.4081298843005634
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -23.788809843194002
[2m[36m(pid=9944)[0m agent reward:  0.4809958548691187
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.39830301

[2m[36m(pid=9944)[0m agent reward:  0.523219185776983
[2m[36m(pid=9944)[0m agent reward:  0.8669510084800945
[2m[36m(pid=9944)[0m agent reward:  0.29462569448254206
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -21.947740507548335
[2m[36m(pid=9944)[0m agent reward:  0.3275286210682163
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.22943096826013604
[2m[36m(pid=9944)[0m agent reward:  0.5868900759341202
[2m[36m(pid=9944)[0m planner reward:  -38.447941380779774
[2m[36m(pid=9944)[0m agent reward:  0.523219185776983
[2m[36m(pid=9944)[0m agent reward:  0.8669510084800945
[2m[36m(pid=9944)[0m agent reward:  0.29462569448254206
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -21.96208075601684
[2m[36m(pid=9944)[0m agent reward:  0.3337421476717032
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2337529

[2m[36m(pid=9944)[0m agent reward:  0.2833932146370119
[2m[36m(pid=9944)[0m agent reward:  0.7616111123067603
[2m[36m(pid=9944)[0m agent reward:  0.16332720369644688
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -48.7094235112825
[2m[36m(pid=9944)[0m agent reward:  0.3851981011209314
[2m[36m(pid=9944)[0m agent reward:  0.8894277833362554
[2m[36m(pid=9944)[0m agent reward:  0.23895416086495438
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -36.50820431476659
[2m[36m(pid=9944)[0m agent reward:  0.2833932146370119
[2m[36m(pid=9944)[0m agent reward:  0.7616111123067603
[2m[36m(pid=9944)[0m agent reward:  0.16332720369644688
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -48.068473407073135
[2m[36m(pid=9944)[0m agent reward:  0.3851981011209314
[2m[36m(pid=9944)[0m agent reward:  0.8894277833362554
[2m[36m(pid=9944)[0m agent rew

[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.2728607481655199
[2m[36m(pid=6908)[0m agent reward:  0.4026703923600597
[2m[36m(pid=6908)[0m agent reward:  0.6606988209995853
[2m[36m(pid=6908)[0m planner reward:  -14.740459991593097
[2m[36m(pid=9944)[0m agent reward:  0.25927525813371094
[2m[36m(pid=9944)[0m agent reward:  0.5664487881833637
[2m[36m(pid=9944)[0m agent reward:  0.1544740898010382
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -33.9637406942957
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.30811967227193887
[2m[36m(pid=6908)[0m agent reward:  0.46859609871042823
[2m[36m(pid=6908)[0m agent reward:  0.2710566749338531
[2m[36m(pid=6908)[0m planner reward:  -44.17090699620156
[2m[36m(pid=9944)[0m agent reward:  0.2508582110699115
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.1274004

[2m[36m(pid=9944)[0m agent reward:  0.25831005490602593
[2m[36m(pid=9944)[0m agent reward:  0.5618145363845416
[2m[36m(pid=9944)[0m agent reward:  0.09131613427950042
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -32.564427467321316
[2m[36m(pid=9944)[0m agent reward:  0.2976149498232207
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.12885242634371122
[2m[36m(pid=9944)[0m agent reward:  0.5839975976978216
[2m[36m(pid=9944)[0m planner reward:  -39.26384973981233
[2m[36m(pid=9944)[0m agent reward:  0.2621855815870734
[2m[36m(pid=9944)[0m agent reward:  0.5631087174574696
[2m[36m(pid=9944)[0m agent reward:  0.09152648770573607
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -33.23621461641875
[2m[36m(pid=9944)[0m agent reward:  0.2886215112305277
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.12718

[2m[36m(pid=9944)[0m agent reward:  0.8429572870110804
[2m[36m(pid=9944)[0m agent reward:  0.9285032370380395
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8961268494640424
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7458370673928899
[2m[36m(pid=6908)[0m agent reward:  0.8516074270421822
[2m[36m(pid=6908)[0m agent reward:  0.9135776732136784
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6647021150988743
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6243787849037761
[2m[36m(pid=9944)[0m agent reward:  0.981700859492719
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.647119579622168
[2m[36m(pid=6908)[0m agent reward:  0.9832784857273625
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.6516509094502829

[2m[36m(pid=9944)[0m agent reward:  0.05831353356814818
[2m[36m(pid=9944)[0m agent reward:  0.06382147394050361
[2m[36m(pid=9944)[0m agent reward:  0.06971851438883307
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -304.14940534415433
[2m[36m(pid=9944)[0m agent reward:  0.6585340739244946
[2m[36m(pid=9944)[0m agent reward:  0.6576323861055321
[2m[36m(pid=9944)[0m agent reward:  0.5191321234798854
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -27.691081173966193
[2m[36m(pid=9944)[0m agent reward:  0.05774785609291871
[2m[36m(pid=9944)[0m agent reward:  0.062186677899895
[2m[36m(pid=9944)[0m agent reward:  0.07038320088818528
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -289.65898759927256
[2m[36m(pid=9944)[0m agent reward:  0.6994960116183114
[2m[36m(pid=9944)[0m agent reward:  0.5396174551565108
[2m[36m(pid=9944)[0m agent

[2m[36m(pid=9944)[0m agent reward:  0.042397436542897925
[2m[36m(pid=9944)[0m agent reward:  0.04969440442415588
[2m[36m(pid=9944)[0m agent reward:  0.04456286469482993
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -281.1111249646355
[2m[36m(pid=9944)[0m agent reward:  0.48243519378064453
[2m[36m(pid=9944)[0m agent reward:  0.5161117890387311
[2m[36m(pid=9944)[0m agent reward:  0.342978911705015
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -20.541734378279013
[2m[36m(pid=9944)[0m agent reward:  0.0380221996179424
[2m[36m(pid=9944)[0m agent reward:  0.053377374415542515
[2m[36m(pid=9944)[0m agent reward:  0.05247633673682703
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -275.1144258158059
[2m[36m(pid=9944)[0m agent reward:  0.5194565112388343
[2m[36m(pid=9944)[0m agent reward:  0.554604481773687
[2m[36m(pid=9944)[0m agent

[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m planner reward:  -38.48890832187842
[2m[36m(pid=9944)[0m agent reward:  0.027410550935691957
[2m[36m(pid=9944)[0m agent reward:  0.053999649044956625
[2m[36m(pid=9944)[0m agent reward:  0.02454913456530083
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -259.25901151202333
[2m[36m(pid=9944)[0m agent reward:  0.5163723192121817
[2m[36m(pid=9944)[0m agent reward:  0.5510948272182603
[2m[36m(pid=9944)[0m agent reward:  0.4021165448552606
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -20.046831134815882
[2m[36m(pid=9944)[0m agent reward:  0.02710758198983539
[2m[36m(pid=9944)[0m agent reward:  0.05671225576048132
[2m[36m(pid=9944)[0m agent reward:  0.025066273194526822
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -260.0836707048426
[2m[36m(pid=9944)[0m agent reward: 

[2m[36m(pid=9944)[0m agent reward:  0.01964062030253152
[2m[36m(pid=9944)[0m agent reward:  0.046062753548090925
[2m[36m(pid=9944)[0m agent reward:  0.020035197701693076
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -250.81669497691468
[2m[36m(pid=9944)[0m agent reward:  0.5154467631348943
[2m[36m(pid=9944)[0m agent reward:  0.5077557785740887
[2m[36m(pid=9944)[0m agent reward:  0.38767444174638144
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -20.33988046995973
[2m[36m(pid=9944)[0m agent reward:  0.01964062030253152
[2m[36m(pid=9944)[0m agent reward:  0.046062753548090925
[2m[36m(pid=9944)[0m agent reward:  0.020035197701693076
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  -250.74209499955813
[2m[36m(pid=9944)[0m agent reward:  0.5154467631348943
[2m[36m(pid=9944)[0m agent reward:  0.5077557785740887
[2m[36m(pid=9944)[0m

[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6042293550537774
[2m[36m(pid=9944)[0m agent reward:  0.6346431036996772
[2m[36m(pid=9944)[0m agent reward:  0.7852591386929517
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.9041362500290954
[2m[36m(pid=6908)[0m agent reward:  0.6195509519344581
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.8100271066982425
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7439741455256923
[2m[36m(pid=9944)[0m agent reward:  0.7715281235216249
[2m[36m(pid=9944)[0m agent reward:  0.8619064665855227
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.6042293550537774
[2m[36m(pid=9944)[0m agent reward:  0.63279338492455
[2m[36m(pid=9944)[0m agent reward:  0.7829983713011295

[2m[36m(pid=6908)[0m agent reward:  0.6531409075931895
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5161674920100332
[2m[36m(pid=6908)[0m agent reward:  0.7528742869285086
[2m[36m(pid=6908)[0m planner reward:  -60.36059377868961
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2815877839837491
[2m[36m(pid=9944)[0m agent reward:  0.9075569087286154
[2m[36m(pid=9944)[0m agent reward:  0.572369672918653
[2m[36m(pid=9944)[0m planner reward:  -30.559075093450865
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.34474758116196325
[2m[36m(pid=9944)[0m agent reward:  0.5481084298020203
[2m[36m(pid=9944)[0m agent reward:  0.6875901427351546
[2m[36m(pid=9944)[0m planner reward:  -55.27036331441085
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.36166229721198895
[2m[36m(pid=6908)[0m agent reward:  0.51897195

[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.34183194304526704
[2m[36m(pid=6908)[0m agent reward:  0.5639009716039595
[2m[36m(pid=6908)[0m agent reward:  0.13346597627107326
[2m[36m(pid=6908)[0m planner reward:  -24.15177632764696
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.634214382478205
[2m[36m(pid=9944)[0m agent reward:  0.793255217941325
[2m[36m(pid=9944)[0m agent reward:  0.23474301714854523
[2m[36m(pid=9944)[0m planner reward:  -59.86288754112926
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7504700339982174
[2m[36m(pid=6908)[0m agent reward:  0.41527464760671345
[2m[36m(pid=6908)[0m agent reward:  0.6812004152839396
[2m[36m(pid=6908)[0m planner reward:  -35.00689568039804
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.3994345329495098
[2m[36m(pid=9944)[0m agent reward:  0.47920574

[2m[36m(pid=9944)[0m planner reward:  -58.53817525151816
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.31266857030404677
[2m[36m(pid=9944)[0m agent reward:  0.42216848819107905
[2m[36m(pid=9944)[0m agent reward:  0.6820258709706518
[2m[36m(pid=9944)[0m planner reward:  -78.92046702602048
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.3458296740928983
[2m[36m(pid=6908)[0m agent reward:  0.5400658089103726
[2m[36m(pid=6908)[0m agent reward:  0.15820780555729128
[2m[36m(pid=6908)[0m planner reward:  -23.923285445341136
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.5942475231365708
[2m[36m(pid=6908)[0m agent reward:  0.548641947329191
[2m[36m(pid=6908)[0m agent reward:  0.597798009037573
[2m[36m(pid=6908)[0m planner reward:  -32.37195779830398
[2m[36m(pid=9944)[0m agent reward:  0.9730721119707294
[2m[36m(pid=9944)[0m agent re

[2m[36m(pid=9944)[0m agent reward:  0.9305669737091657
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7912257476633615
[2m[36m(pid=9944)[0m agent reward:  0.2958134302534713
[2m[36m(pid=9944)[0m planner reward:  -63.22359855199126
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2183354466950689
[2m[36m(pid=9944)[0m agent reward:  0.31760571222956213
[2m[36m(pid=9944)[0m agent reward:  0.5562301386499042
[2m[36m(pid=9944)[0m planner reward:  -214.23696473773518
[2m[36m(pid=9944)[0m agent reward:  0.9123627214573377
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7577505909494501
[2m[36m(pid=9944)[0m agent reward:  0.290633423618759
[2m[36m(pid=9944)[0m planner reward:  -62.604936960975856
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.2183354466950689
[2m[36m(pid=9944)[0m agent reward:  0.31760571

[2m[36m(pid=9944)[0m agent reward:  0.528013772363991
[2m[36m(pid=9944)[0m agent reward:  0.5355540699768541
[2m[36m(pid=9944)[0m agent reward:  0.7667172056867086
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.957153807803021
[2m[36m(pid=6908)[0m agent reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.7423637098696891
[2m[36m(pid=6908)[0m agent reward:  0.575249421543763
[2m[36m(pid=6908)[0m planner reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.7982523744734477
[2m[36m(pid=9944)[0m agent reward:  0.7967762010722239
[2m[36m(pid=9944)[0m agent reward:  1.0
[2m[36m(pid=9944)[0m agent reward:  0.8906723016277691
[2m[36m(pid=9944)[0m planner reward:  1.0
[2m[36m(pid=6908)[0m agent reward:  0.6839963005356181
[2m[36m(pid=6908)[0m agent reward:  0.9811171154982933
[2m[36m(pid=6908)[0m agent reward:  0.8064286988530073
[2m[36m(pid=6908)[0m agent reward:  1.0


KeyboardInterrupt: 

In [None]:
#env_obj.env.get_agent(3).state

In [None]:
def generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=5
):
    dense_logs = {}
    for idx in range(num_dense_logs):
        # Set initial states
        agent_states = {}
        for agent_idx in range(env_obj.env.n_agents):
            agent_states[str(agent_idx)] = trainer.get_policy("a").get_initial_state()
        planner_states = trainer.get_policy("p").get_initial_state()   

        # Play out the episode
        obs = env_obj.reset(force_dense_logging=True)
        for t in range(env_obj.env.episode_length):
            actions = {}
            for agent_idx in range(env_obj.env.n_agents):
                # Use the trainer object directly to sample actions for each agent
                actions[str(agent_idx)] = trainer.compute_action(
                    obs[str(agent_idx)], 
                    agent_states[str(agent_idx)], 
                    policy_id="a",
                    full_fetch=False
                )

            # Action sampling for the planner
            actions["p"] = trainer.compute_action(
                obs['p'], 
                planner_states, 
                policy_id='p',
                full_fetch=False
            )

            obs, rew, done, info = env_obj.step(actions)        
            if done['__all__']:
                break
        dense_logs[idx] = env_obj.env.dense_log
    return dense_logs

In [None]:
dense_logs = generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=1
)

In [None]:
#dense_logs[0]['states'][0]

In [None]:
#planner_gr_score_importances = [log["p"]["GreenScoreImportance"] for log in dense_logs[0]['states']]

In [None]:
agent_0_green_scores = [log["0"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_1_green_scores = [log["1"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_2_green_scores = [log["2"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_3_green_scores = [log["3"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]

stock_prices = [log["0"]["endogenous"]["StockPrice"] for log in dense_logs[0]['states']]

In [None]:
plt.plot(np.arange(0,101),agent_0_green_scores, label="Agent 0")
plt.plot(np.arange(0,101),agent_1_green_scores, label="Agent 1")
plt.plot(np.arange(0,101),agent_2_green_scores, label="Agent 2")
plt.plot(np.arange(0,101),agent_3_green_scores, label="Agent 3")
plt.title('Stock Brocker Total Balance')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Total Balance')
#plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
plt.plot(np.arange(0,101),stock_prices, label="Agent 0")
plt.title('Stock Price Over Time')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Stock Price')
#plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
stocks_left = [log["0"]["endogenous"]["StocksLeft"] for log in dense_logs[0]['states']]

plt.plot(np.arange(0,101),stocks_left, label="Stocks Quantity Left")
plt.title('Stock Price Over Time')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Stock Price')
#plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
# Shutdown Ray after use
ray.shutdown()