# Reproduce the example in the documentation
https://docs.ray.io/en/master/rllib-env.html

In [17]:
from ray.rllib.env.multi_agent_env import MultiAgentEnv, make_multi_agent

In [20]:
# create a two-agents environment
env = make_multi_agent("CartPole-v0")
env = env({"num_agents": 2})

In [22]:
# initial observations
print(env.reset())

{0: array([-0.01186764, -0.01690307, -0.03482541,  0.00744292]), 1: array([ 0.04030211, -0.0495211 , -0.02017659, -0.01807976])}


In [25]:
# take a step
new_obs, rewards, dones, infos = env.step({0: 0, 1: 1})

In [26]:
print(rewards)

{0: 1.0, 1: 1.0}


In [27]:
# whether the epsodes are done
print(dones)

{0: False, 1: False, '__all__': False}


# See how the GridWorldObstacleEnv works

In [9]:
from environments.gridworld_obstacles.simulation.environment import GridWorldObstaclesEnv
from common.config_file_handler import load_yaml

In [10]:
config = load_yaml('/mainfs/home/fb1n15/MARL-Jack/marl-disaster-relief/configs/experiments/fc_depth/ppo_fc_independent_depth1.yaml')
print(config)

{'name': 'gridworld_obstacles_independent_ppo_fc_depth1', 'env': 'gridworld_obstacles_vision_net', 'env-config': {'width': 40, 'height': 40, 'num_survivors': 20, 'num_agents': 3, 'start_world': [[]], 'sight': 5, 'battery': 500, 'rewards': {'rescue': 1, 'hit tree': 0, 'exploring': 0.01}, 'battery costs': {'rotate left': 1, 'rotate right': 1, 'advance': 2}, 'fire spread': {'starting points': 0, 'covariance': [[3, 0], [0, 3]], 'rate': 0.1}, 'autogen config': {'forest fire': {'chance': 1, 'trees': {'scale': 20.0, 'octaves': 8, 'persistence': 0.5, 'lacunarity': 2.0, 'threshold': 0.07}, 'rocks': {'scale': 6.0, 'octaves': 10, 'persistence': 0.5, 'lacunarity': 5.0, 'threshold': 0.2}, 'hq': {'size': 5}}}}, 'trainer': 'PPO', 'trainer-config': {'framework': 'torch', 'num_envs_per_worker': 1, 'num_workers': 2, 'num_gpus': 1, 'model': {'custom_model': 'FCModel', 'custom_model_config': {'layers': [256]}}, 'lr': 8e-05, 'lambda': 1.0, 'entropy_coeff': 0.0, 'rollout_fragment_length': 200, 'train_batch_

In [11]:
env_config = config['env-config']
env_config

{'width': 40,
 'height': 40,
 'num_survivors': 20,
 'num_agents': 3,
 'start_world': [[]],
 'sight': 5,
 'battery': 500,
 'rewards': {'rescue': 1, 'hit tree': 0, 'exploring': 0.01},
 'battery costs': {'rotate left': 1, 'rotate right': 1, 'advance': 2},
 'fire spread': {'starting points': 0,
  'covariance': [[3, 0], [0, 3]],
  'rate': 0.1},
 'autogen config': {'forest fire': {'chance': 1,
   'trees': {'scale': 20.0,
    'octaves': 8,
    'persistence': 0.5,
    'lacunarity': 2.0,
    'threshold': 0.07},
   'rocks': {'scale': 6.0,
    'octaves': 10,
    'persistence': 0.5,
    'lacunarity': 5.0,
    'threshold': 0.2},
   'hq': {'size': 5}}}}

In [12]:
env = GridWorldObstaclesEnv(env_config)

In [13]:
print(env.reset())

{'drone_0': array([[[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False,  True],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, 

In [14]:
# take a step
new_obs, rewards, dones, infos = env.step({'drone_0': 0, 'drone1': 1, 'drone2': 0})

In [15]:
print(rewards)

{'drone_0': 0.0, 'drone_1': 0.0, 'drone_2': 0.0}


In [16]:
# whether the epsodes are done
print(dones)

{'__all__': False}


In [17]:
print(infos)

{}


In [18]:
print(new_obs)

{'drone_0': array([[[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, 

# Test the edge cloud environment using simple algorithms from RLlib package

In [65]:
# from environments.gridworld_obstacles.simulation.environment import GridWorldObstaclesEnv
from common.config_file_handler import load_yaml
from environments.edge_cloud.simulation.environment import EdgeCloudEnv
import pprint
# auto reload
%load_ext autoreload
%autoreload 2

config = load_yaml('../configs/experiments/edge_cloud/fc_depth/ppo_fc_independent_depth1.yaml')
env_config = config['env-config']
edge_env = EdgeCloudEnv(env_config)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
pp = pprint.PrettyPrinter(indent=4)

In [67]:
edge_env.verbose=False

In [70]:
print(edge_env.reset())

{'drone_0': [584.612723292089, 4.0, 3.7545287107682563, 4.529519154027988, 4.812685467273033, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'drone_1': [584.612723292089, 4.0, 3.7545287107682563, 4.529519154027988, 4.812685467273033, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'drone_2': [584.612723292089, 4.0, 3.7545287107682563, 4.529519154027988, 4.812685467273033, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [76]:
# take a step
actions = {'node_0': 1, 'node_1': 0, 'node_2': 0}
new_obs, rewards, dones, infos = edge_env.step(actions)
print(rewards)

{'drone_0': 0, 'drone_1': 0, 'drone_2': 0}


In [77]:
print(dones)

{'__all__': False}


In [79]:
print(new_obs)

{'drone_0': [52.28394826227, 4.0, 2.2631508296999296, 2.4764819094832706, 2.807762417735701, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0], 'drone_1': [52.28394826227, 4.0, 2.2631508296999296, 2.4764819094832706, 2.807762417735701, 1.0, 4.0, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0], 'drone_2': [52.28394826227, 4.0, 2.2631508296999296, 2.4764819094832706, 2.807762417735701, 1.0, 4.0, 0.823279145652103, 0.7547653049095929, 0.7469853791233187, 0.823279145652103, 0.75476

In [80]:
print(infos)

{}


In [81]:
# take a step
actions = {'node_0': 2, 'node_1': 3, 'node_2': 4}
new_obs, rewards, dones, infos = edge_env.step(actions)
print(rewards)

{'drone_0': 0, 'drone_1': 0, 'drone_2': 0}


In [83]:
print(new_obs)

{'drone_0': [99.15767226786063, 4.0, 2.568491665608129, 2.57322349107025, 2.8909739589971326, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 0, 3, 1, 1, 1, 0, 0, 0, 4], 'drone_1': [99.15767226786063, 4.0, 2.568491665608129, 2.57322349107025, 2.8909739589971326, 1.0, 4.0, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.9232802460771041, 0.9586244268882372, 0.9583346213211267, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 0, 3, 1, 1, 1, 0, 0, 0, 4], 'drone_2': [99.15767226786063, 4.0, 2.568491665608129, 2.57322349107025, 2.8909739589971326, 1.0, 4.0, 0.823279145652103, 0.7547653049095929, 0.7469853791233187, 0.823279145652103, 0.7547653049095929, 0.7469853791233187, 0.8232791

In [84]:
len(new_obs['drone_0'])

58