# Reproduce the example in the documentation
https://docs.ray.io/en/master/rllib-env.html

In [17]:
from ray.rllib.env.multi_agent_env import MultiAgentEnv, make_multi_agent

In [20]:
# create a two-agents environment
env = make_multi_agent("CartPole-v0")
env = env({"num_agents": 2})

In [22]:
# initial observations
print(env.reset())

{0: array([-0.01186764, -0.01690307, -0.03482541,  0.00744292]), 1: array([ 0.04030211, -0.0495211 , -0.02017659, -0.01807976])}


In [25]:
# take a step
new_obs, rewards, dones, infos = env.step({0: 0, 1: 1})

In [26]:
print(rewards)

{0: 1.0, 1: 1.0}


In [27]:
# whether the epsodes are done
print(dones)

{0: False, 1: False, '__all__': False}


# See how the GridWorldObstacleEnv works

In [9]:
from environments.gridworld_obstacles.simulation.environment import GridWorldObstaclesEnv
from common.config_file_handler import load_yaml

In [10]:
config = load_yaml('/mainfs/home/fb1n15/MARL-Jack/marl-disaster-relief/configs/experiments/fc_depth/ppo_fc_independent_depth1.yaml')
print(config)

{'name': 'gridworld_obstacles_independent_ppo_fc_depth1', 'env': 'gridworld_obstacles_vision_net', 'env-config': {'width': 40, 'height': 40, 'num_survivors': 20, 'num_agents': 3, 'start_world': [[]], 'sight': 5, 'battery': 500, 'rewards': {'rescue': 1, 'hit tree': 0, 'exploring': 0.01}, 'battery costs': {'rotate left': 1, 'rotate right': 1, 'advance': 2}, 'fire spread': {'starting points': 0, 'covariance': [[3, 0], [0, 3]], 'rate': 0.1}, 'autogen config': {'forest fire': {'chance': 1, 'trees': {'scale': 20.0, 'octaves': 8, 'persistence': 0.5, 'lacunarity': 2.0, 'threshold': 0.07}, 'rocks': {'scale': 6.0, 'octaves': 10, 'persistence': 0.5, 'lacunarity': 5.0, 'threshold': 0.2}, 'hq': {'size': 5}}}}, 'trainer': 'PPO', 'trainer-config': {'framework': 'torch', 'num_envs_per_worker': 1, 'num_workers': 2, 'num_gpus': 1, 'model': {'custom_model': 'FCModel', 'custom_model_config': {'layers': [256]}}, 'lr': 8e-05, 'lambda': 1.0, 'entropy_coeff': 0.0, 'rollout_fragment_length': 200, 'train_batch_

In [11]:
env_config = config['env-config']
env_config

{'width': 40,
 'height': 40,
 'num_survivors': 20,
 'num_agents': 3,
 'start_world': [[]],
 'sight': 5,
 'battery': 500,
 'rewards': {'rescue': 1, 'hit tree': 0, 'exploring': 0.01},
 'battery costs': {'rotate left': 1, 'rotate right': 1, 'advance': 2},
 'fire spread': {'starting points': 0,
  'covariance': [[3, 0], [0, 3]],
  'rate': 0.1},
 'autogen config': {'forest fire': {'chance': 1,
   'trees': {'scale': 20.0,
    'octaves': 8,
    'persistence': 0.5,
    'lacunarity': 2.0,
    'threshold': 0.07},
   'rocks': {'scale': 6.0,
    'octaves': 10,
    'persistence': 0.5,
    'lacunarity': 5.0,
    'threshold': 0.2},
   'hq': {'size': 5}}}}

In [12]:
env = GridWorldObstaclesEnv(env_config)

In [13]:
print(env.reset())

{'drone_0': array([[[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False,  True],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, 

In [14]:
# take a step
new_obs, rewards, dones, infos = env.step({'drone_0': 0, 'drone1': 1, 'drone2': 0})

In [15]:
print(rewards)

{'drone_0': 0.0, 'drone_1': 0.0, 'drone_2': 0.0}


In [16]:
# whether the epsodes are done
print(dones)

{'__all__': False}


In [17]:
print(infos)

{}


In [18]:
print(new_obs)

{'drone_0': array([[[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, 

# Test the edge cloud environment using simple algorithms from RLlib package

In [1]:
from environments.gridworld_obstacles.simulation.environment import GridWorldObstaclesEnv
from common.config_file_handler import load_yaml
from environments.edge_cloud.simulation.environment import EdgeCloudEnv

config = load_yaml('../configs/experiments/edge_cloud/fc_depth/ppo_fc_independent_depth1.yaml')
env_config = config['env-config']
edge_env = EdgeCloudEnv(env_config)

In [2]:
print(edge_env.reset())

tasks information
    valuation_coefficient  arrive_time  start_time  deadline  usage_time  \
0               54.855064     0.319427           1         4           4   
1               91.897245     0.343713           1         4           4   
2              548.049204     1.207613           2         5           4   
3               98.822973     1.481198           2         5           4   
4               73.432560     2.010665           3         6           4   
5               98.838054     2.437006           3         6           4   
6               80.242276     4.497445           5         8           4   
7               86.963179     6.518506           7        10           4   
8               51.959390     7.049253           8        11           4   
9               64.140348     7.202132           8        11           4   
10              56.009828     7.438983           8        11           4   
11              64.807010     7.754556           8        11          

In [3]:
# take a step
actions = {'node_0': 1, 'node_1': 0, 'node_2': 0}
new_obs, rewards, dones, infos = edge_env.step(actions)
print(rewards)

current time slot = 0
next task's time slot = 0
bid prices:
[54.855063789653066, 0.0, 0.0]
max usage times:
[4, 4, 4]
start times:
[1, 1, 1]
relative start times:
[0, 0, 0]
winner ID = 1
allocation scheme:
node_id       1
start_time    1
end_time      4
Name: 0, dtype: object
idle resource capacities of winner node:
[[3.         0.70343668 0.70343668 0.70343668 0.70343668]
 [3.         0.67918316 0.67918316 0.67918316 0.67918316]
 [3.         0.12464042 0.12464042 0.12464042 0.12464042]]
occupancy of future 10 time steps:
[[[0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]]

 [[0.76552111 0.76552111 0.76552111 0.76552111 0.         0.
   0.         0.         0.         0.        ]
  [0.77360561 0.77360561 0.7736056

In [4]:
print(dones)

{'__all__': False}


In [5]:
print(new_obs)

{'node_0': array([0.10859528, 1.        , 0.46074691, 0.5772321 , 0.59534823,
       1.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        ]), 'node_1': array([0.10859528, 1.        , 0.46074691, 0.5772321 , 0.59534823,
       1.        , 1.        , 0.76552111, 0.77360561, 0.95845319,
       0.76552111, 0.77360561, 0.95845319, 0.76552111, 0.77360561,
       0.95845319, 0.76552111, 0.77360561, 0.95845319, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0

In [6]:
print(infos)

{'social_welfare_increase': 129.50737838766017}
