In [1]:
#automatically reload modules when they have changed
%reload_ext autoreload
from pathlib import Path
import random
import os

from yaml_config_wrapper import Configuration
from RLcraft import MalmoMazeEnv

In [2]:
def create_env(config):
    """ Create a custom OpenAI gym environment (custom MalmoMazeEnv). """
    xml = Path(config["mission_file"]).read_text()
    env = MalmoMazeEnv(
        xml=xml,
        width=config["width"],
        height=config["height"],
        millisec_per_tick=config["millisec_per_tick"])
    return env

yml_path = 'configs/lava_maze.yml'
# Load YML config file
c = Configuration(config_src=yml_path)
# Load configs from config class
c_general = c.get_config('general')[0]
c_tuner = c.get_config('tuner')[0]
# Load the values from the config
run_config = c_tuner['config']
c_general = c_general['config']
env = None

Configuration file loaded successfully from path: /home/ubuntu/minerl/configs/lava_maze.yml
Configuration Tag: lava-maze-v1


In [3]:
if env is not None:
    env.close()
# Set Schema path
os.environ['MALMO_XSD_PATH'] = '/home/ubuntu/minerl/MalmoPlatform/Schemas'
# Generate a seed for maze 
print("Generating new seed ...")
maze_seed = random.randint(1, 9999)
print("Loading environment ...")
xml = Path(run_config["env_config"]["mission_file"]).read_text()
env = MalmoMazeEnv(
            xml=xml,
            width=800,
            height=600,
            millisec_per_tick=50,
            mazeseed=maze_seed,
            step_reward=c_general['step_reward'],
            win_reward=c_general['win_reward'],
            lose_reward=c_general['lose_reward'],
            action_space=c_general['action_space'],
            client_port=c_general['client_port'],
            time_wait=c_general['time_wait'],
            max_loop=c_general['max_loop'])
print("Resetting environment ...")
_ = env.reset()
print("Environment Loaded!")

Generating new seed ...
Loading environment ...
Resetting environment ...
Environment Loaded!


In [5]:
print("Possible actions and action space actions for current env:")
for action in range(6):
    print(f"Action: {action} -> {env.action_space[action]}")

Possible actions and action space actions for current env:
Action: 0 -> move 1
Action: 1 -> move -1
Action: 2 -> strafe 1
Action: 3 -> strafe -1
Action: 4 -> turn 1
Action: 5 -> turn -1


In [None]:
print("Sample an action")
action = env.action_space.sample()
print(f'{action} -> {env.action_space[action]}')

Sample an action
3 -> strafe 1


In [9]:
# Take an action and get the observation, reward, done, and info
obs, reward, done, info = env.step(0)

In [6]:
print("Reward: ", reward)
print("Done: ", done)
print("Info: ", info)
print(f"Observation: type={type(obs)}, shape={obs.shape}")

Reward:  -1
Done:  False
Info:  {}
Observation: type=<class 'numpy.ndarray'>, shape=(600, 800, 3)


In [14]:
# Use the internal actions to move the agent
# move: walks forwards/backwards
#   0 -> Nothing, 1 -> Forward, -1 -> Backward
# strafe: walks left/right
#   0 -> Nothing, 1 -> Right, -1 -> Left
# turn: turns the camera left/right without moving
#   0 -> Nothing, 1 -> Right, -1 -> Left
env.agent_host.sendCommand("turn 0")