From 16e41980079f05fc38cb7246c14ed3fc2b50a77d Mon Sep 17 00:00:00 2001 From: ucacaxm Date: Fri, 5 Oct 2018 09:47:54 +0200 Subject: [PATCH] Update Python-API.md Hi, First, thank you for you great work in ml-agents ! Since it was not so obvious to create a simple random agent with the python API I would like to improve the doc/Python-API.md with an example. And also fix the import: from mlagents.envs.environment import UnityEnvironment ## A simple example of random agent ```python import sys import numpy as np from mlagents.envs.environment import UnityEnvironment """The world's simplest agent!""" class RandomAgent(object): def __init__(self, action_space): print("Action size="+str(action_space)) self.action = np.empty((action_space), dtype=float) def act(self, observation, reward, done): for j in range(self.action.shape[0]): self.action[j] = -1.0 + 2.0 * np.random.random_sample() return self.action if __name__ == '__main__': env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1) # 3DBall executable should exist episode_count = 100 reward = 0 done = False brain = env.brains['BrainRoller'] # See brain name in the Unity Editor print("brain="+str(brain)) print("number of agents=" + str(len(brain.vector_action_space_size))) print("action space type="+str(brain.vector_action_space_type)) print("action space size="+str(brain.vector_action_space_size)) print("observation space size=" + str(brain.vector_observation_space_size)) print("=================================") info = env.reset() agent = RandomAgent(brain.vector_action_space_size) agentId = 0 # assume onmy one agent for i in range(episode_count): info = env.reset() gameover = False totalReward = 0 while not gameover: brainInfo = info['BrainRoller'] action = agent.act( brainInfo.vector_observations[agentId], brainInfo.rewards[agentId], brainInfo.local_done[agentId]) totalReward += brainInfo.rewards[agentId] info = env.step(action) brainInfo = info['BrainRoller'] gameover = brainInfo.local_done[agentId] print("Game over, total reward=" + str(totalReward)) env.close() ``` --- docs/Python-API.md | 65 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/docs/Python-API.md b/docs/Python-API.md index a75bde9764..d3fb854a96 100644 --- a/docs/Python-API.md +++ b/docs/Python-API.md @@ -50,7 +50,7 @@ file, put the file in the same directory as `envs`. For example, if the filename of your Unity environment is 3DBall.app, in python, run: ```python -from mlagents.envs import UnityEnvironment +from mlagents.envs.environment import UnityEnvironment env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1) ``` @@ -143,6 +143,69 @@ variable named `env` in this example, can be used in the following way: Sends a shutdown signal to the environment and closes the communication socket. + +## A simple example of random agent + +```python +import sys +import numpy as np + +from mlagents.envs.environment import UnityEnvironment + + +"""The world's simplest agent!""" +class RandomAgent(object): + def __init__(self, action_space): + print("Action size="+str(action_space)) + self.action = np.empty((action_space), dtype=float) + + def act(self, observation, reward, done): + for j in range(self.action.shape[0]): + self.action[j] = -1.0 + 2.0 * np.random.random_sample() + return self.action + + + + +if __name__ == '__main__': + env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1) # 3DBall executable should exist + + episode_count = 100 + reward = 0 + done = False + + brain = env.brains['BrainRoller'] # See brain name in the Unity Editor + print("brain="+str(brain)) + print("number of agents=" + str(len(brain.vector_action_space_size))) + print("action space type="+str(brain.vector_action_space_type)) + print("action space size="+str(brain.vector_action_space_size)) + print("observation space size=" + str(brain.vector_observation_space_size)) + print("=================================") + + info = env.reset() + agent = RandomAgent(brain.vector_action_space_size) + + agentId = 0 # assume onmy one agent + for i in range(episode_count): + info = env.reset() + gameover = False + totalReward = 0 + while not gameover: + brainInfo = info['BrainRoller'] + action = agent.act( brainInfo.vector_observations[agentId], brainInfo.rewards[agentId], brainInfo.local_done[agentId]) + totalReward += brainInfo.rewards[agentId] + info = env.step(action) + brainInfo = info['BrainRoller'] + gameover = brainInfo.local_done[agentId] + + print("Game over, total reward=" + str(totalReward)) + env.close() + +``` + + + + ## mlagents-learn For more detailed documentation on using `mlagents-learn`, check out