In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import matplotlib.pyplot as plt
from pprint import pprint

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('..')

from gym_minigrid_navigation.utils import show_video

from utils import init_logger, switch_reproducibility_on, display_stats

init_logger('train_master')
init_logger('train_worker')
init_logger('environments')
init_logger('gym_minigrid_navigation.environments')

In [4]:
from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('../conf/minigrid_second_step.hocon')
config['env']['video_path'] = './video/'

In [5]:
switch_reproducibility_on(config['seed'])

## environment 

In [19]:
from gym_minigrid_navigation.environments import gen_wrapped_env

env_config = {
    'env_task': 'MiniGrid-Dynamic-Obstacles',
    'grid_size': 8,
    'rgb_image': True,
    'tile_size': 4,
}

grid_size = env_config['grid_size'] * env_config.get('tile_size', 1)
env = gen_wrapped_env(env_config)

## Load worker_agent from first step

In [25]:
import pickle
import torch
from dqn import get_dqn_agent

worker_config = {
    'path': '../outputs/models/minigrid_worker_draft.p',
    'device': 'cuda:0'
}

with open(worker_config['path'], 'rb') as f:
    worker_agent = pickle.load(f)

device = torch.device(worker_config['device'])
worker_agent.qnetwork_local.to(device)

emb_size = worker_agent.qnetwork_local.master.output_size

## master agent 

In [20]:
from gym_minigrid_navigation.encoders import get_encoder

master_config= {
    'state_encoder_type': 'simple_cnn',
    'n_channels': [6, 16],
    'kernel_sizes': [4, 3],
    'max_pools': [4, 1],
}

state_encoder = get_encoder(grid_size, master_config)

In [21]:
from ddpg import DDPGAgentMaster, MasterNetwork

master_agent_config = {
    'buffer_size': 1000000,
    'batch_size': 512,
    'noise_decay': 0.995,
    'min_noise': 0.01,
    'explore': True,
    'update_step': 4,
    'device': "cuda:0"
}

master_network = MasterNetwork(emb_size, state_encoder)
master_agent = DDPGAgentMaster(master_network, master_agent_config)

## master training

In [None]:
from train_master import run_episodes

training_config = {
    'n_episodes': 1000,
    'verbose': 100,
}

scores, steps = run_episodes(env, worker_agent, master_agent, n_episodes=training_config['n_episodes'], verbose=training_config['verbose'])