Unity-Technologies · pderichai · Aug 31, 2018 · Aug 30, 2018 · Aug 31, 2018
diff --git a/Dockerfile b/Dockerfile
@@ -122,16 +122,11 @@ RUN apt-get update && apt-get -y upgrade
 # xvfb is used to do CPU based rendering of Unity
 RUN apt-get install -y xvfb
 
-
-COPY ml-agents/requirements.txt .
-RUN pip install --trusted-host pypi.python.org -r requirements.txt
-
-COPY README.md .
 COPY ml-agents /ml-agents
 WORKDIR /ml-agents
 RUN pip install .
 
 # port 5005 is the port used in in Editor training.
 EXPOSE 5005
 
-ENTRYPOINT ["python", "mlagents/learn.py"]
+ENTRYPOINT ["mlagents-learn"]
diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
@@ -19,7 +19,7 @@ project to decide the best course of action for an agent.
 
 Use the command `mlagents-learn` to train your agents. This command is installed
 with the `mlagents` package and its implementation can be found at
-`ml-agents/learn.py`. The [configuration file](#training-config-file),
+`ml-agents/mlagents/trainers/learn.py`. The [configuration file](#training-config-file),
 `config/trainer_config.yaml` specifies the hyperparameters used during training.
 You can edit this file with a text editor to add a specific configuration for
 each brain.

diff --git a/docs/Using-Docker.md b/docs/Using-Docker.md
@@ -26,10 +26,11 @@ agents using camera-based visual observations might be slower.
 
 - Since Docker runs a container in an environment that is isolated from the host
   machine, a mounted directory in your host machine is used to share data, e.g.
-  the Unity executable, curriculum files and TensorFlow graph. For convenience,
-  we created an empty `unity-volume` directory at the root of the repository for
-  this purpose, but feel free to use any other directory. The remainder of this
-  guide assumes that the `unity-volume` directory is the one used.
+  the trainer configuration file, Unity executable, curriculum files and
+  TensorFlow graph. For convenience, we created an empty `unity-volume`
+  directory at the root of the repository for this purpose, but feel free to use
+  any other directory. The remainder of this guide assumes that the
+  `unity-volume` directory is the one used.
 
 ## Usage
 
@@ -84,7 +85,7 @@ docker run --name <container-name> \
            -p 5005:5005 \
            <image-name>:latest \
            --docker-target-name=unity-volume \
-           <trainer-config-path> \
+           <trainer-config-file> \
            --env=<environment-name> \
            --train \
            --run-id=<run-id>
@@ -108,8 +109,8 @@ Notes on argument values:
 - `docker-target-name`: Tells the ML-Agents Python package what the name of the
   disk where it can read the Unity executable and store the graph. **This should
   therefore be identical to `target`.**
-- `trainer-config-path`, `train`, `run-id`: ML-Agents arguments passed to
-  `mlagents-learn`. `trainer-config-path` is the filepath of the trainer config
+- `trainer-config-file`, `train`, `run-id`: ML-Agents arguments passed to
+  `mlagents-learn`. `trainer-config-file` is the filename of the trainer config
   file, `train` trains the algorithm, and `run-id` is used to tag each
   experiment with a unique identifier. We recommend placing the trainer-config
   file inside `unity-volume` so that the container has access to the file.
@@ -122,7 +123,8 @@ docker run --name 3DBallContainer.first.trial \
            -p 5005:5005 \
            balance.ball.v0.1:latest 3DBall \
            --docker-target-name=unity-volume \
-           <trainer-config-path> \
+           trainer_config.yaml \
+           --env=3DBall
            --train \
            --run-id=3dball_first_trial
 ```

diff --git a/ml-agents/mlagents/learn.py → ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/learn.py → ml-agents/mlagents/trainers/learn.py
@@ -7,8 +7,8 @@
 import numpy as np
 from docopt import docopt
 
-from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.trainers.exception import TrainerError
+from .trainer_controller import TrainerController
+from .exception import TrainerError
 
 
 def run_training(sub_id, run_seed, run_options):
@@ -19,24 +19,20 @@ def run_training(sub_id, run_seed, run_options):
     :param run_options: Command line arguments for training.
     """
     # Docker Parameters
-    if run_options['--docker-target-name'] == 'Empty':
-        docker_target_name = ''
-    else:
-        docker_target_name = run_options['--docker-target-name']
+    docker_target_name = (run_options['--docker-target-name']
+        if run_options['--docker-target-name'] != 'None' else None)
 
     # General parameters
-    env_path = run_options['--env']
-    if env_path == 'None':
-        env_path = None
+    env_path = (run_options['--env']
+        if run_options['--env'] != 'None' else None)
     run_id = run_options['--run-id']
     load_model = run_options['--load']
     train_model = run_options['--train']
     save_freq = int(run_options['--save-freq'])
     keep_checkpoints = int(run_options['--keep-checkpoints'])
     worker_id = int(run_options['--worker-id'])
-    curriculum_file = str(run_options['--curriculum'])
-    if curriculum_file == 'None':
-        curriculum_file = None
+    curriculum_file = (run_options['--curriculum']
+        if run_options['--curriculum'] != 'None' else None)
     lesson = int(run_options['--lesson'])
     fast_simulation = not bool(run_options['--slow'])
     no_graphics = run_options['--no-graphics']
@@ -72,7 +68,7 @@ def main():
     except:
         print('\n\n\tUnity Technologies\n')
 
-    logger = logging.getLogger('mlagents.learn')
+    logger = logging.getLogger('mlagents.trainers')
     _USAGE = '''
     Usage:
       mlagents-learn <trainer-config-path> [options]
@@ -91,7 +87,7 @@ def main():
       --slow                     Whether to run the game at training speed [default: False].
       --train                    Whether to train model, or only run inference [default: False].
       --worker-id=<n>            Number to add to communication port (5005) [default: 0].
-      --docker-target-name=<dt>  Docker volume to store training-specific files [default: Empty].
+      --docker-target-name=<dt>  Docker volume to store training-specific files [default: None].
       --no-graphics              Whether to run the environment in no-graphics mode [default: False].
     '''
 
@@ -112,7 +108,3 @@ def main():
         p = multiprocessing.Process(target=run_training, args=(i, run_seed, options))
         jobs.append(p)
         p.start()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -45,8 +45,6 @@ def __init__(self, env_path, run_id, save_freq, curriculum_folder,
         :param no_graphics: Whether to run the Unity simulator in no-graphics
                             mode.
         """
-        self.trainer_config_path = trainer_config_path
-
         if env_path is not None:
             # Strip out executable extensions if passed
             env_path = (env_path.strip()
@@ -56,13 +54,18 @@ def __init__(self, env_path, run_id, save_freq, curriculum_folder,
                         .replace('.x86', ''))
 
         # Recognize and use docker volume if one is passed as an argument
-        if docker_target_name == '':
+        if not docker_target_name:
             self.docker_training = False
+            self.trainer_config_path = trainer_config_path
             self.model_path = './models/{run_id}'.format(run_id=run_id)
             self.curriculum_folder = curriculum_folder
             self.summaries_dir = './summaries'
         else:
             self.docker_training = True
+            self.trainer_config_path = \
+                '/{docker_target_name}/{trainer_config_path}'.format(
+                    docker_target_name=docker_target_name,
+                    trainer_config_path = trainer_config_path)
             self.model_path = '/{docker_target_name}/models/{run_id}'.format(
                 docker_target_name=docker_target_name,
                 run_id=run_id)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
@@ -43,7 +43,7 @@
 
     entry_points={
         'console_scripts': [
-            'mlagents-learn=mlagents.learn:main',
+            'mlagents-learn=mlagents.trainers.learn:main',
         ],
     },
 )