In [1]:
import matplotlib.pyplot as plt
from pprint import pprint

%matplotlib inline
plt.rcParams['figure.figsize'] = (6.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

from gym_minigrid_navigation.utils import show_video
from utils import init_logger, switch_reproducibility_on, display_stats

init_logger('master_worker')
init_logger('train_worker')

In [3]:
from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('../conf/minigrid_navigation_mlp_master_worker.hocon')
config['env']['video_path'] = './video/'

In [4]:
switch_reproducibility_on(config['seed'])

# Train worker

In [5]:
import train_worker

In [6]:
env = train_worker.gen_navigation_env(config['env'])

In [7]:
worker_agent = train_worker.get_agent(config)

In [None]:
scores, steps = train_worker.run_episodes(env, worker_agent, n_episodes=1000, verbose=config['training.verbose'])

In [None]:
display_stats(scores, steps)

# Train master

In [None]:
import train_master
init_logger('train_master')

In [None]:
config = ConfigFactory.parse_file('../conf/minigrid_navigation_mlp_master_worker.hocon')
del config['env']['goal_achieving_criterion']
del config['env']['goal_type']

In [None]:
emb_size = worker_agent.qnetwork_local.master.output_size
master_agent = train_master.get_master_agent(emb_size, config)

In [None]:
env = train_worker.gen_env(config['env'])

In [None]:
scores, steps = train_master.run_episodes(env, worker_agent, master_agent, n_episodes=1000, verbose=config['training.verbose'])

In [None]:
display_stats(scores, steps)