In [1]:
import matplotlib.pyplot as plt
from pprint import pprint

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

from gym_minigrid_navigation.utils import show_video
from navigation_policy import gen_env, get_agent, run_episode, run_episodes, get_goal_achieving_criterion

from rewards import get_reward_function
from utils import init_logger, switch_reproducibility_on, display_stats

init_logger('master_worker')
init_logger('navigation_policy')

In [3]:
from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('../conf/minigrid_master_worker.hocon')
config['env']['video_path'] = './video/'

In [4]:
switch_reproducibility_on(config['seed'])

# Train worker

In [5]:
reward_functions = get_reward_function(config)
goal_achieving_criterion = get_goal_achieving_criterion(config)
env = gen_env(config['env'], goal_achieving_criterion, reward_functions)

In [6]:
worker_agent = get_agent(config)

In [7]:
scores, steps = run_episodes(env, worker_agent, None, n_episodes=1000, verbose=config['training.verbose'])

2021-03-23 16:22:42,273 INFO    navigation_policy      : Episode: 100. scores: -24.44, steps: 92.12, achieved: 0.90
2021-03-23 16:22:55,924 INFO    navigation_policy      : Episode: 200. scores: -3.00, steps: 23.49, achieved: 1.00
2021-03-23 16:23:06,736 INFO    navigation_policy      : Episode: 300. scores: -1.53, steps: 17.85, achieved: 1.00
2021-03-23 16:23:12,889 INFO    navigation_policy      : Episode: 400. scores: 0.53, steps: 10.33, achieved: 1.00
2021-03-23 16:23:19,370 INFO    navigation_policy      : Episode: 500. scores: 0.52, steps: 10.85, achieved: 1.00
2021-03-23 16:23:24,830 INFO    navigation_policy      : Episode: 600. scores: 0.96, steps: 8.61, achieved: 1.00
2021-03-23 16:23:29,622 INFO    navigation_policy      : Episode: 700. scores: 1.37, steps: 6.97, achieved: 1.00
2021-03-23 16:23:36,868 INFO    navigation_policy      : Episode: 800. scores: 0.05, steps: 11.39, achieved: 1.00
2021-03-23 16:23:41,862 INFO    navigation_policy      : Episode: 900. scores: 1.34, s

# Train master

In [8]:
worker_agent.qnetwork_local.master

Flattener(
  (model): MLP(
    (model): Sequential(
      (0): Linear(in_features=108, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): ReLU()
      (4): Tanh()
    )
  )
)

In [9]:
state_size = worker_agent.qnetwork_local.master.model.model[0].in_features
emb_size = worker_agent.qnetwork_local.master.model.model[2].out_features
print(state_size, emb_size)

108 64


In [10]:
from gym_minigrid_navigation.encoders import get_encoders
state_encoder, _ = get_encoders(config)

In [11]:
from ddpg import DDPGAgentMaster, MasterNetwork

master_network = MasterNetwork(emb_size, state_encoder)
master_agent = DDPGAgentMaster(master_network, config)

In [12]:
import gym
from gym.wrappers import Monitor
from gym_minigrid_navigation.environments import FullyObsWrapper, FullyRenderWrapper, PosObsWrapper

env = gym.make('MiniGrid-Empty-8x8-v0')
env = FullyObsWrapper(env)
env = FullyRenderWrapper(env)
env = Monitor(env, config.env.video_path, force=True)

In [13]:
scores, steps = run_episodes(env, worker_agent, master_agent, n_episodes=1000, verbose=config['training.verbose'])

2021-03-23 16:26:16,871 INFO    navigation_policy      : Episode: 100. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:29:08,464 INFO    navigation_policy      : Episode: 200. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:32:00,957 INFO    navigation_policy      : Episode: 300. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:34:55,129 INFO    navigation_policy      : Episode: 400. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:37:54,089 INFO    navigation_policy      : Episode: 500. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:40:56,633 INFO    navigation_policy      : Episode: 600. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:44:06,437 INFO    navigation_policy      : Episode: 700. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:47:33,073 INFO    navigation_policy      : Episode: 800. scores: 0.00, steps: 256.00, achieved: 0.00
2021-03-23 16:51:13,137 INFO    navigation_policy      : Episode: 900. scores: 0