In [1]:
#automatically reload modules when they have changed
%reload_ext autoreload
from pathlib import Path
import random
import os

from yaml_config_wrapper import Configuration
from RLcraft import MalmoMazeEnv

In [2]:
yml_path = 'configs/mazes.yml'
# Load YML config file
c = Configuration(config_src=yml_path)
# Load configs from config class
c_general = c.get_config('general')[0]
c_tuner = c.get_config('tuner')[0]
# Load the values from the config
run_config = c_tuner['config']
env_config = run_config['env_config']
c_general = c_general['config']

Configuration file loaded successfully from path: /home/ubuntu/minerl/configs/mazes.yml
Configuration Tag: mazes-v1


In [7]:
try:
    env.close()
except:
    pass
# Set Schema path
os.environ['MALMO_XSD_PATH'] = '/home/ubuntu/minerl/MalmoPlatform/Schemas'
# Generate a seed for maze 
print("Generating new seed ...")
maze_seed = random.randint(1, 9999)
print("Loading environment ...")
# num = random.randint(0, 8)
# mission_file = run_config["env_config"]["mission_file"].replace("*", str(num))
# from glob import glob
# mission_files = glob(run_config["env_config"]["mission_file"])
# mission_file = random.choice(mission_files)
env = MalmoMazeEnv(
            xml=run_config["env_config"]["mission_file"],
            width=600,
            height=600,
            millisec_per_tick=50,
            mazeseed=maze_seed,
            mission_timeout_ms=env_config['mission_timeout_ms'],
            step_reward=env_config['step_reward'],
            win_reward=env_config['win_reward'],
            lose_reward=env_config['lose_reward'],
            action_space=env_config['action_space'],
            client_port=env_config['client_port'],
            time_wait=env_config['time_wait'],
            max_loop=env_config['max_loop'])
print("Resetting environment ...")
_ = env.reset()
print("Environment Loaded!")

Generating new seed ...
Loading environment ...
client_port: 7771
Resetting environment ...
Environment Loaded!


In [8]:
print("Possible actions and action space actions for current env:")
for action in range(6):
    print(f"Action: {action} -> {env.action_space[action]}")

Possible actions and action space actions for current env:
Action: 0 -> move 1
Action: 1 -> move -1
Action: 2 -> strafe 1
Action: 3 -> strafe -1
Action: 4 -> turn 1
Action: 5 -> turn -1


In [9]:
print("Sample an action")
action = env.action_space.sample()
print(f'{action} -> {env.action_space[action]}')

Sample an action
0 -> move 1


In [10]:
# Take an action and get the observation, reward, done, and info
obs, reward, done, info = env.step(3)

In [16]:
print("Reward: ", reward)
print("Done: ", done)
print("Info: ", info)
print(f"Observation: type={type(obs)}, shape={obs.shape}")

Reward:  0.0
Done:  True
Info:  WorldState (ended): 0 obs, 1 rewards, 20 frames since last state.
Observation: type=<class 'numpy.ndarray'>, shape=(600, 800, 3)


In [11]:
print(info.observations[0])

TimestampedString: 2022-Dec-07 23:26:13.161756, {"DistanceTravelled":0,"TimeAlive":356,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0,"Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Survivor","XPos":4.5,"YPos":227.0625,"ZPos":1.5,"Pitch":0.0,"Yaw":0.0,"WorldTime":6000,"TotalTime":378,"floor10x10":["air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","air","beacon","beacon","beacon","beacon","beacon","beacon","beacon","air","air","beacon","fire","carpet","carpet","carpet","carpet","beacon","air","air","beacon","fire","sea_lantern","carpet","sea_lantern","carpet","beacon","air","air","beacon","emerald_block","carpet","carpet","carpet","carpet","beacon","air","air","beacon","carpet","sea_lantern","carpet","sea_lantern","carpet","beacon","air","air","beacon","carpet","carpet","carpet","sea_lantern","carpet","beacon","air","air"]}


In [4]:
# Use the internal actions to move the agent
# move: walks forwards/backwards
#   0 -> Nothing, 1 -> Forward, -1 -> Backward
# strafe: walks left/right
#   0 -> Nothing, 1 -> Right, -1 -> Left
# turn: turns the camera left/right without moving
#   0 -> Nothing, 1 -> Right, -1 -> Left
env.agent_host.sendCommand("turn 1")

In [6]:
env.close()