In [42]:
import gym
from gym.spaces import Box, Discrete, Tuple, MultiDiscrete
import logging
import random

from ray.rllib.env import MultiAgentEnv
import ray
import os
from ray import tune
from ray.tune import function
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy, PPOTorchPolicy
import ray.rllib.agents.ppo as ppo
import numpy as np

In [None]:
class Camera_Env(gym.Env):
    def __init__(self, camera, 
                       Width = 417,
                       Height = 404):
        
        self.camera = camera
        self.camera.AcquisitionFrameRateAbs.Value = 10
        self.camera.GainRaw.Value = 36
        self.camera.AcquisitionMode.SetValue('Continuous')
        self.Width = Width
        self.Height = Height
        self.camera.Width.Value = Width
        self.camera.Height.Value = Height
        print(self.camera.Width.GetValue(), self.camera.Height.GetValue())
        # see https://github.com/basler/pypylon/blob/master/samples/opencv.py
        img = pylon.PylonImage()
        self.converter = pylon.ImageFormatConverter()
        # converting to opencv bgr format
        self.converter.OutputPixelFormat = pylon.PixelType_BGR8packed
        self.converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned
        
        
        self.range_ex_time = np.concatenate((np.arange(1e3, 45e3, 1000), 
                                             np.arange(50e3, 500e3, 5000), 
                                             np.arange(600e3, 900e3, 50000), 
                                             np.arange(100e4, 500e4, 500000))).astype(np.int64)
        
        number_of_actions = len(self.range_ex_time)
        
        self.action_space = Discrete([number_of_actions])
        # image
        self.observation_space = Box(low=0, 
                                    high=255, 
                                    shape=(Height, Width, 3),
                                    dtype = np.uint8)
        
        
    def grab_image(self):
        # see https://github.com/basler/pypylon/blob/master/samples/opencv.py
        self.camera.StartGrabbing()
        while 1:
            grabResult = self.camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
            if grabResult.GrabSucceeded():
                # Access the image data
                image = self.converter.Convert(grabResult)
                image = image.GetArray()
                break
        self.camera.StopGrabbing()
        return image
    
    def basler(self, action):  
        ExposureTimeRaw = int(self.range_ex_time[action])
        self.camera.ExposureTimeRaw.Value = ExposureTimeRaw
        time.sleep(5) 
        
    def check_hist(self, img):
        hist , bin_edges = np.histogram(np.array(img).ravel())
        max_value_bin = bin_edges[np.argmax(hist)]
        if max_value_bin > 50 and max_value_bin < 150:
            pic_ok = True
        else:
            pic_ok = False
        return pic_ok
    
    def reset(self):
        self.camera.ExposureTimeRaw.Value = 20000
    
    def step(self, action):
        self.basler(action)
        img = self.grab_image()
        done = self.check_hist(img)
        if done:
            reward = 1
        else:
            reward = -1
        return img, reward, done, {}
    
def Lens_Env(gym.Env):
    def __init__(self, camera):
        
        # corning lib
        self.lib = cdll.LoadLibrary(r"C:\Users\CIG\Documents\MATLAB\ComCasp64.dll")
        #Check if Maxim driver dll is loaded
        eCOMCaspErr = getattr(self.lib,'Casp_OpenCOM')
        print('eCOMCaspErr:', eCOMCaspErr(), self.lib.Casp_OpenCOM())
        
        self.action_space = spaces.MultiDiscrete([69, 99])
        # image
        self.observation_space = spaces.Box(low=0, 
                                            high=255, 
                                            shape=(Height, Width, 3),
                                            dtype = np.uint8)
        
        self.camera_env = Camera_Env(camera)
    
    def lens_movement(self, action):
        x = c_double(action)
        self.lib.Casp_SetFocusVoltage(x)
        time.sleep(2)
        
    def reset(self):
        self.lens_movement(24.0)
        
    def step(self, action):
        self.lens_movement(action)
        img = self.camera_env.grab_image()
        return img, -1, False, {}
    
    
def BaslerEnv(MultiAgentEnv):
    def __init__(self, camera,
                       Width = 417,
                       Height = 404,
                       threshold = 85,
                       filepath = ''):
        self.basler_env = Camera_Env(camera)
        self.corning_env = Lens_Env(camera)
        
        # yolo
        self.model_yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', 
                                         force_reload=True, 
                                         pretrained=True)
        
        
    def reset(self):
        self.basler_env.reset()
        self.corning_env.reset()
        
        self.steps_remaining_at_level = None
        self.num_high_level_steps = 0
        
        self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)
        
        return {"high_level_agent": self.basler_env.grab_image(),}
    
    def focus_value(self, img):
        # Calculate the gradient
        sobelx = cv2.Sobel(np.float32(img), cv2.CV_64F, 1 , 0, ksize=5)
        sobely = cv2.Sobel(np.float32(img),cv2.CV_64F, 0, 1, ksize=5)

        abs_sobel_x = cv2.convertScaleAbs(sobelx) # converting back to uint8
        abs_sobel_y = cv2.convertScaleAbs(sobely)
        #print(abs_sobel_x )

        # Combine the two gradients with equal weight
        dst = cv2.addWeighted(abs_sobel_x,0.5,abs_sobel_y,0.5,0)
        #print(dst)

        # Calculate the average gradient for the image
        # I convert it to a numpy array for ease of calculation
        return pl.asarray(dst).mean()
    
    def find_class(self, results, obj = 'car'):
        len_of_class = len(results.pred)
        pred_class = [int(results.pred[0][i].numpy()[5]) for i in range(len(results.pred[0]))]
        #list_of_yolo_classes = list(zip(results.names, range(0, len(results.names))))
        pred_names = [results.names[i] for i in pred_class]
        if obj in pred_names:
            index = pred_names.index(obj)
            return results.pred[0][index].numpy()
        else:
            return np.array([])
        
    
    def step(self, action_dict):
        if "high_level_agent" in action_dict:
            return self._high_level_step(action_dict["high_level_agent"])
        else:
            return self._low_level_step(list(action_dict.values())[0])
        
    
    def _high_level_step(self, action):
        
        obs_camera, reward_camera, self.done_camera, _ = self.basler_env.step(action)
        
        self.steps_remaining_at_level = 10
        self.num_high_level_steps += 1
        
        self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)
        
        obs = {self.low_level_agent_id: obs_camera}
        
        rew = {self.low_level_agent_id: 0}
        
        done = {"__all__": False}
        
        return obs, rew, done, {}
    
    def _low_level_step(self, action):
        
        self.steps_remaining_at_level -= 1
        
        action_lens_coarse = 0
        action_lens_fine = action[1]
        if action[0] < 24:
            action_lens_coarse = 24
        else:
            action_lens_coarse = action[0]
        action_lens = float("%s.%s"% (str(action_lens_coarse), str(action_lens_fine)))
        
        obs_lens, reward_lens, done_lens, _ = self.corning_env.step(action_lens)
        obs = {self.low_level_agent_id: obs_lens}
        
        #rew = {self.low_level_agent_id: 1}
        prediction_yolo = self.model_yolo(obs_lens)
        final_result_yolo = self.find_class(prediction_yolo)
        
        if final_result_yolo.size == 0:
            print('not detected')
            foc_value = -1
            betta = 0.5
            rew = {self.low_level_agent_id: -100-betta*(10 - self.steps_remaining_at_level)}
            done[self.low_level_agent_id] = False
            
        else:
            image = Image.fromarray(obs_lens)
            image = image.crop((final_result_yolo[0], 
                             final_result_yolo[1], 
                             final_result_yolo[2], 
                             final_result_yolo[3]))
            foc_value = self.focus_value(image)
            
        if (foc_value > self.threshold and final_result_yolo.size != 0) and self.steps_remaining_at_level != 0:
            done[self.low_level_agent_id] = True
            if self.done_camera == True:
                done["__all__"] = True
            rew = {self.low_level_agent_id: 1}
            print('------',True, action, self.steps_remaining_at_level, '------')
        elif foc_value < self.threshold and final_result_yolo.size != 0 and self.steps_remaining_at_level != 0:
            done[self.low_level_agent_id] = False
            rew = {self.low_level_agent_id: 1 - 0.001*(foc_value - self.threshold)**2}
            print(False, action)

            
        im = Image.fromarray(observation)
        filename = self.file_path + "\img_%s_%s_%s.png" % (str(action_lens), 
                                                           str(self.range_ex_time[action_camera]), 
                                                           str(done))
        im.save(filename)
        
        
        
        
        
        
        
        
            
        
    
         
        
        

In [38]:
# logger = logging.getLogger(__name__)
MAP_DATA = """
#########
#S      #
####### #
      # #
      # #
####### #
#F      #
#########"""

class Windy(gym.Env):
    
    def __init__(self, env_config):
        self.map = [m for m in MAP_DATA.split("\n") if m]
        self.x_dim = len(self.map)
        self.y_dim = len(self.map[0])
#         logger.info("Loaded map {} {}".format(self.x_dim, self.y_dim))
        for x in range(self.x_dim):
            for y in range(self.y_dim):
                if self.map[x][y] == "S":
                    self.start_pos = (x, y)
                elif self.map[x][y] == "F":
                    self.end_pos = (x, y)
                    
                    
        self.observation_space = Tuple([
            Box(0, 100, shape=(2, )),  # (x, y)
            Discrete(4),  # wind direction (N, E, S, W)
        ])
        
        self.action_space = Discrete(2)  # whether to move or not

    def reset(self):
#         self.wind_direction = random.choice([0, 1, 2, 3])
#         self.pos = self.start_pos
#         self.num_steps = 0
        return [[0, 1], 1]

    def step(self, action):
#         if action == 1:
#             self.pos = self._get_new_pos(self.pos, 1)
#         self.num_steps += 1
# #         self.wind_direction = random.choice([0, 1, 2, 3])
#         at_goal = self.pos == self.end_pos
#         done = at_goal or self.num_steps >= 200
        return ([[0, 1], 1], -1, False, {})
    
    def _get_new_pos(self, pos, direction):
        return (0, 0)
#         if direction == 0:
#             new_pos = (pos[0] - 1, pos[1])
#         elif direction == 1:
#             new_pos = (pos[0], pos[1] + 1)
#         elif direction == 2:
#             new_pos = (pos[0] + 1, pos[1])
#         elif direction == 3:
#             new_pos = (pos[0], pos[1] - 1)
#         if (new_pos[0] >= 0 and new_pos[0] < self.x_dim and new_pos[1] >= 0
#                 and new_pos[1] < self.y_dim
#                 and self.map[new_pos[0]][new_pos[1]] != "#"):
#             print('position:', new_pos)
#             return new_pos
#         else:
#             print('position:', pos)
#             return pos  # did not move


class Hierarchical(MultiAgentEnv):
    def __init__(self, env_config):
        self.flat_env = Windy(env_config)
        

    def reset(self):
        print('-----reset-----')
        self.cur_obs = self.flat_env.reset()
        self.current_goal = None
        self.steps_remaining_at_level = None
        self.num_high_level_steps = 0
        # current low level agent id. This must be unique for each high level
        # step since agent ids cannot be reused.
        self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)
        print('self.low_level_agent_id:', self.low_level_agent_id)
        print('----------')
        return {"high_level_agent": self.cur_obs,}

    def step(self, action_dict):
        print('-----step-----')
        print('action_dict:', action_dict)
        assert len(action_dict) == 1, action_dict
        if "high_level_agent" in action_dict:
            return self._high_level_step(action_dict["high_level_agent"])
        else:
            print('action_dict.values:', list(action_dict.values()))
            return self._low_level_step(list(action_dict.values())[0])

    def _high_level_step(self, action):
        print('-----high_level_step-----')
#         logger.debug("High level agent sets goal".format(action))
        self.current_goal = action
        self.steps_remaining_at_level = 25
        self.num_high_level_steps += 1
        self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)
        obs = {self.low_level_agent_id: [self.cur_obs, self.current_goal]}
        rew = {self.low_level_agent_id: 0}
        done = {"__all__": False}
        print('high_level_step:', action, done)
        return obs, rew, done, {}

    def _low_level_step(self, action):
        print('----------low_level_step----------')
#         logger.debug("Low level agent step {}".format(action))
        self.steps_remaining_at_level -= 1
        print('steps_remaining_at_level:', self.steps_remaining_at_level)
        cur_pos = tuple(self.cur_obs[0])
        goal_pos = self.flat_env._get_new_pos(cur_pos, self.current_goal)

        # Step in the actual env
        f_obs, f_rew, f_done, _ = self.flat_env.step(action)
        new_pos = tuple(f_obs[0])
        self.cur_obs = f_obs

        # Calculate low-level agent observation and reward
        obs = {self.low_level_agent_id: [f_obs, self.current_goal]}
        if new_pos != cur_pos:
            if new_pos == goal_pos:
                rew = {self.low_level_agent_id: 1}
            else:
                rew = {self.low_level_agent_id: -1}
        else:
            rew = {self.low_level_agent_id: 0}

        # Handle env termination & transitions back to higher level
        done = {"__all__": False}
        if f_done:
            done["__all__"] = True
#             logger.debug("high level final reward {}".format(f_rew))
            rew["high_level_agent"] = f_rew
            obs["high_level_agent"] = f_obs
        elif self.steps_remaining_at_level == 0:
            done[self.low_level_agent_id] = True
            rew["high_level_agent"] = 0
            obs["high_level_agent"] = f_obs
        print('low_level_step:', action, rew,done)
        return obs, rew, done, {}

In [39]:
from ray.tune.registry import register_env

def env_creator(env_config):
    return Hierarchical(env_config)

register_env("Hierarchical", env_creator)

In [40]:
def policy_mapping_fn(agent_id):
    if agent_id.startswith("low_level_"):
        return "low_level_policy"
    else:
        return "high_level_policy"
    
    
stop = {
        "training_iteration": 200,
        "timesteps_total": 100,
        "episode_reward_mean": 0.0,
    }

maze = Windy(None)

config = {
    "env": Hierarchical,
    "num_workers": 0,
    "entropy_coeff": 0.01,
    "multiagent": {
        "policies": {
            
            "high_level_policy": (PPOTFPolicy, 
                                  maze.observation_space,
                                  Discrete(1), {
                                      "gamma": 0.9
                                  }),
            
            "low_level_policy": (PPOTFPolicy,
                                 Tuple([
                                     maze.observation_space,
                                     Discrete(1)
                                 ]), maze.action_space, {
                                     "gamma": 0.0
                                 }),
        },
        "policy_mapping_fn": function(policy_mapping_fn),
    },
    "framework": "tf",#"torch", #if args.torch else "tf",
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
    "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
}

trainer = ppo.PPOTrainer(env= "Hierarchical", config=config)

Install gputil for GPU system monitoring.


In [41]:
for i in range(100):
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    print(pretty_print(result))

-----reset-----
self.low_level_agent_id: low_level_0
----------
-----step-----
action_dict: {'high_level_agent': 0}
-----high_level_step-----
high_level_step: 0 {'__all__': False}
-----step-----
action_dict: {'low_level_1': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 24
low_level_step: 1 {'low_level_1': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_1': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 23
low_level_step: 0 {'low_level_1': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_1': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 22
low_level_step: 0 {'low_level_1': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_1': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 21
low_level_step: 0 {'low_level_1': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_1': 0}
action_di

low_level_step: 1 {'low_level_5': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_5': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 0
low_level_step: 1 {'low_level_5': 0, 'high_level_agent': 0} {'__all__': False, 'low_level_5': True}
-----step-----
action_dict: {'high_level_agent': 0}
-----high_level_step-----
high_level_step: 0 {'__all__': False}
-----step-----
action_dict: {'low_level_6': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 24
low_level_step: 0 {'low_level_6': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_6': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 23
low_level_step: 0 {'low_level_6': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_6': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 22
low_level_step: 1 {'low_level_6': 0} {'__all__': False}
-----step-----
action

low_level_step: 1 {'low_level_9': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_9': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 19
low_level_step: 1 {'low_level_9': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_9': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 18
low_level_step: 1 {'low_level_9': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_9': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 17
low_level_step: 1 {'low_level_9': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_9': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 16
low_level_step: 1 {'low_level_9': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_9': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 15
low_level_step: 0 {'low_level_9': 0} {'__all_

-----step-----
action_dict: {'low_level_12': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 21
low_level_step: 0 {'low_level_12': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_12': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 20
low_level_step: 0 {'low_level_12': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_12': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 19
low_level_step: 1 {'low_level_12': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_12': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 18
low_level_step: 0 {'low_level_12': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_12': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 17
low_level_step: 0 {'low_level_12': 0} {'__all__': False}
-----step-----
action_dict: {'low_l

-----step-----
action_dict: {'low_level_15': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 12
low_level_step: 1 {'low_level_15': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_15': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 11
low_level_step: 0 {'low_level_15': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_15': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 10
low_level_step: 1 {'low_level_15': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_15': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 9
low_level_step: 1 {'low_level_15': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_15': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 8
low_level_step: 0 {'low_level_15': 0} {'__all__': False}
-----step-----
action_dict: {'low_lev

-----step-----
action_dict: {'low_level_18': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 13
low_level_step: 1 {'low_level_18': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_18': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 12
low_level_step: 0 {'low_level_18': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_18': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 11
low_level_step: 0 {'low_level_18': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_18': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 10
low_level_step: 0 {'low_level_18': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_18': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 9
low_level_step: 0 {'low_level_18': 0} {'__all__': False}
-----step-----
action_dict: {'low_le

-----step-----
action_dict: {'low_level_21': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 16
low_level_step: 0 {'low_level_21': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_21': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 15
low_level_step: 1 {'low_level_21': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_21': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 14
low_level_step: 0 {'low_level_21': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_21': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 13
low_level_step: 1 {'low_level_21': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_21': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 12
low_level_step: 1 {'low_level_21': 0} {'__all__': False}
-----step-----
action_dict: {'low_l

-----step-----
action_dict: {'low_level_24': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 23
low_level_step: 0 {'low_level_24': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_24': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 22
low_level_step: 0 {'low_level_24': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_24': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 21
low_level_step: 0 {'low_level_24': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_24': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 20
low_level_step: 0 {'low_level_24': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_24': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 19
low_level_step: 1 {'low_level_24': 0} {'__all__': False}
-----step-----
action_dict: {'low_l

-----step-----
action_dict: {'low_level_27': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 16
low_level_step: 1 {'low_level_27': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_27': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 15
low_level_step: 1 {'low_level_27': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_27': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 14
low_level_step: 1 {'low_level_27': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_27': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 13
low_level_step: 0 {'low_level_27': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_27': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 12
low_level_step: 1 {'low_level_27': 0} {'__all__': False}
-----step-----
action_dict: {'low_l

low_level_step: 1 {'low_level_30': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_30': 0}
action_dict.values: [0]
----------low_level_step----------
steps_remaining_at_level: 15
low_level_step: 0 {'low_level_30': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_30': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 14
low_level_step: 1 {'low_level_30': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_30': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 13
low_level_step: 1 {'low_level_30': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_30': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 12
low_level_step: 1 {'low_level_30': 0} {'__all__': False}
-----step-----
action_dict: {'low_level_30': 1}
action_dict.values: [1]
----------low_level_step----------
steps_remaining_at_level: 11
low_level_step: 1 {'low_level_30': 

KeyboardInterrupt: 

In [28]:
maze = WindyMazeEnv(None)

def policy_mapping_fn(agent_id):
    if agent_id.startswith("low_level_"):
        return "low_level_policy"
    else:
        return "high_level_policy"
    
    
stop = {
        "training_iteration": 200,
        "timesteps_total": 100,
        "episode_reward_mean": 0.0,
    }

config = {
    "env": HierarchicalWindyMazeEnv,
    "num_workers": 0,
    "entropy_coeff": 0.01,
    "multiagent": {
        "policies": {
            
            "high_level_policy": (PPOTFPolicy, 
                                  maze.observation_space,
                                  Discrete(1), {
                                      "gamma": 0.9
                                  }),
            
            "low_level_policy": (PPOTFPolicy,
                                 Tuple([
                                     maze.observation_space,
                                     Discrete(1)
                                 ]), maze.action_space, {
                                     "gamma": 0.0
                                 }),
        },
        "policy_mapping_fn": function(policy_mapping_fn),
    },
    "framework": "tf",#"torch", #if args.torch else "tf",
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
    "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
}

results = tune.run("PPO", stop=stop, config=config, verbose=1)

[2m[36m(pid=21231)[0m Instructions for updating:
[2m[36m(pid=21231)[0m non-resource variables are not supported in the long term
[2m[36m(pid=21231)[0m 2021-03-05 20:49:07,782	INFO trainer.py:616 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=21231)[0m 2021-03-05 20:49:07,782	INFO trainer.py:643 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=21231)[0m Instructions for updating:
[2m[36m(pid=21231)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=21231)[0m Instructions for updating:
[2m[36m(pid=21231)[0m Use tf.where in 2.0, which has the same broadcast rule as np.where


[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_1': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0, 'high_level_agent': 0} {'__all__': False, 'low_level_2': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[

[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0, 'high_level_agent': 0} {'__all__': False, 'low_level_3': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}

[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0, 'high_level_agent': 0} {'__all__': False, 'low_level_3': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}


[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_2': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_2': -1, 'high_level_agent': 0} {'__all__': False, 'low_level_2': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}

[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'

[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m lo

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m l

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1, 'high_level_agent': 0} {'__all__': False, 'low_level_3': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m l

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_3': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_3': -1, 'high_level_agent': 0} {'__all__': False, 'low_level_3': True}
[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}

[2m[36m(pid=21231)[0m high_level_step: 0 {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_4': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'

[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_5': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_5': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_5': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_5': -1} {'__all__': False}
[2m[36m(pid=21231)[0m 

[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_6': 1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 0 {'low_level_6': 0} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_6': -1} {'__all__': False}
[2m[36m(pid=21231)[0m low_level_step: 1 {'low_level_6': 1} {'__all__': False}
[2m[36m(pid=21231)[0m lo

KeyboardInterrupt: 