In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import matplotlib.pyplot as plt
from pprint import pprint

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('..')

from gym_minigrid_navigation.utils import show_video
from train_worker import gen_navigation_env, get_agent, run_episode, run_episodes, get_goal_achieving_criterion

from rewards import get_reward_function
from utils import init_logger, switch_reproducibility_on, display_stats

init_logger('dqn')
init_logger('train_worker')
init_logger('environments')
init_logger('gym_minigrid_navigation.environments')

### config 

In [4]:
from pyhocon import ConfigFactory

config = ConfigFactory.parse_file('../conf/minigrid_first_step.hocon')
config['env']['video_path'] = './video/'

In [5]:
switch_reproducibility_on(config['seed'])

### environment 

In [6]:
config['env']['state_distance_network_path'] = '../outputs/models/state_distance_encoder.p'

In [7]:
env = gen_navigation_env(config['env'], verbose=True)

### agent 

In [8]:
from utils import init_logger

agent = get_agent(config)

2021-03-25 13:52:49,773 INFO    dqn                    : Running on device: cuda:0


### trainings 

In [None]:
scores, steps = run_episodes(env, agent, n_episodes=500, verbose=config['training.verbose'])

2021-03-25 13:55:23,022 INFO    train_worker           : Episode: 100. scores: -10.47, steps: 112.53, achieved: 0.71
2021-03-25 13:58:42,586 INFO    train_worker           : Episode: 200. scores: -12.29, steps: 130.14, achieved: 0.66


In [None]:
display_stats(scores, steps)

In [12]:
env.threshold

0.8

In [None]:
env = gen_env(config['env'], goal_achieving_criterion, reward_functions)

In [66]:
config = ConfigFactory.parse_file('../conf/minigrid_dqn_navigation_mlp.hocon')

In [67]:
from gym_minigrid_navigation import environments as minigrid_envs

In [68]:
from gym_minigrid_navigation.environments import gen_wrapped_env

In [69]:
import numpy as np

def random_grid_goal_generator(conf, verbose=False):
    env = gen_wrapped_env(conf)
    grid_size = env.unwrapped.grid.encode().shape[0]
    init_pos = np.array([1, 1])
    
    while True:
        goal_pos = None
        while goal_pos is None or (init_pos == goal_pos).all():
            goal_pos = np.random.randint(1, grid_size - 2, 2)
        if verbose and (goal_pos == np.array([1, 1])).all():
            print(f"Goal: {goal_pos}")

        env.unwrapped.agent_pos = goal_pos
        goal_state = env.observation(env.unwrapped.gen_obs())
        yield goal_state

In [70]:
a = random_grid_goal_generator(config['env'], True)

In [71]:
for x in range(1000):
    next(a)

In [41]:
np.random.randint(1, 5, 2)

array([3, 1])

In [59]:
env = gen_wrapped_env(config['env'])

In [73]:
s = next(a)
s.keys()

dict_keys(['mission', 'image', 'position'])

In [74]:
s['position']

array([2, 1])

### visualisation 

In [None]:
from gym_minigrid_navigation import environments as minigrid_envs

env = gen_env(config['env'], goal_achieving_criterion, reward_functions, verbose = True)
env = minigrid_envs.visualisation_wrapper(env, config['env.video_path'])

print(run_episode(env, agent, train_mode=False))

In [None]:
show_video()