From 16e41980079f05fc38cb7246c14ed3fc2b50a77d Mon Sep 17 00:00:00 2001
From: ucacaxm <alexandre.meyer@univ-lyon1.fr>
Date: Fri, 5 Oct 2018 09:47:54 +0200
Subject: [PATCH] Update Python-API.md

Hi,

First, thank you for you great work in ml-agents !

Since it was not so obvious to create a simple random agent
with the python API I would like to improve the doc/Python-API.md with an example.
And also fix the import:
from mlagents.envs.environment import UnityEnvironment


## A simple example of random agent

```python
import sys
import numpy as np

from mlagents.envs.environment import UnityEnvironment


"""The world's simplest agent!"""
class RandomAgent(object):
    def __init__(self, action_space):
        print("Action size="+str(action_space))
        self.action = np.empty((action_space), dtype=float)

    def act(self, observation, reward, done):
        for j in range(self.action.shape[0]):
            self.action[j] = -1.0 + 2.0 * np.random.random_sample()
        return self.action


if __name__ == '__main__':
    env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)  # 3DBall executable should exist

    episode_count = 100
    reward = 0
    done = False

    brain = env.brains['BrainRoller']       # See brain name in the Unity Editor
    print("brain="+str(brain))
    print("number of agents=" + str(len(brain.vector_action_space_size)))
    print("action space type="+str(brain.vector_action_space_type))
    print("action space size="+str(brain.vector_action_space_size))
    print("observation space size=" + str(brain.vector_observation_space_size))
    print("=================================")

    info = env.reset()
    agent = RandomAgent(brain.vector_action_space_size)

    agentId = 0            # assume onmy one agent
    for i in range(episode_count):
        info = env.reset()
        gameover = False
        totalReward = 0
        while not gameover:
            brainInfo = info['BrainRoller']
            action = agent.act( brainInfo.vector_observations[agentId], brainInfo.rewards[agentId], brainInfo.local_done[agentId])
            totalReward += brainInfo.rewards[agentId]
            info = env.step(action)
            brainInfo = info['BrainRoller']
            gameover = brainInfo.local_done[agentId]

        print("Game over, total reward=" + str(totalReward))
    env.close()

```
---
 docs/Python-API.md | 65 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/docs/Python-API.md b/docs/Python-API.md
index a75bde9764..d3fb854a96 100644
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -50,7 +50,7 @@ file, put the file in the same directory as `envs`. For example, if the filename
 of your Unity environment is 3DBall.app, in python, run:
 
 ```python
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)
 ```
 
@@ -143,6 +143,69 @@ variable named `env` in this example, can be used in the following way:
   Sends a shutdown signal to the environment and closes the communication
   socket.
 
+
+## A simple example of random agent
+
+```python
+import sys
+import numpy as np
+
+from mlagents.envs.environment import UnityEnvironment
+
+
+"""The world's simplest agent!"""
+class RandomAgent(object):
+    def __init__(self, action_space):
+        print("Action size="+str(action_space))
+        self.action = np.empty((action_space), dtype=float)
+
+    def act(self, observation, reward, done):
+        for j in range(self.action.shape[0]):
+            self.action[j] = -1.0 + 2.0 * np.random.random_sample()
+        return self.action
+
+
+
+
+if __name__ == '__main__':
+    env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)  # 3DBall executable should exist
+
+    episode_count = 100
+    reward = 0
+    done = False
+
+    brain = env.brains['BrainRoller']       # See brain name in the Unity Editor
+    print("brain="+str(brain))
+    print("number of agents=" + str(len(brain.vector_action_space_size)))
+    print("action space type="+str(brain.vector_action_space_type))
+    print("action space size="+str(brain.vector_action_space_size))
+    print("observation space size=" + str(brain.vector_observation_space_size))
+    print("=================================")
+
+    info = env.reset()
+    agent = RandomAgent(brain.vector_action_space_size)
+
+    agentId = 0            # assume onmy one agent
+    for i in range(episode_count):
+        info = env.reset()
+        gameover = False
+        totalReward = 0
+        while not gameover:
+            brainInfo = info['BrainRoller']
+            action = agent.act( brainInfo.vector_observations[agentId], brainInfo.rewards[agentId], brainInfo.local_done[agentId])
+            totalReward += brainInfo.rewards[agentId]
+            info = env.step(action)
+            brainInfo = info['BrainRoller']
+            gameover = brainInfo.local_done[agentId]
+
+        print("Game over, total reward=" + str(totalReward))
+    env.close()
+
+```
+
+
+
+
 ## mlagents-learn
 
 For more detailed documentation on using `mlagents-learn`, check out