In [1]:
from animalai.envs.arena_config import Vector3, RGB, Item, Arena, ArenaConfig
from animalai.envs.environment import AnimalAIEnvironment
from mlagents_envs.exception import UnityCommunicationException

from typing import List
from animalai.communicator_objects import (
    ArenasConfigurationsProto,
    ArenaConfigurationProto,
    ItemToSpawnProto,
    VectorProto,
)

from typing import List
import yaml

In [2]:
import json
import jsonpickle
def save_config(arenas, json_path: str) -> None:
    out = jsonpickle.encode(arenas)
    out = json.loads(out)
    json.dump(out, open(json_path, "w"), indent=4)

## 1. Learning how to generate our own environments

In [3]:
## We redefine the class ArenaConfig to MyArenaConfig so instead of reading a .yaml file.
## it reads the arena object we create

class ArenaConfig(yaml.YAMLObject):
    yaml_tag = u"!ArenaConfig"

    def __init__(self, my_arena: Arena = None):

        self.arenas = {-1: my_arena}

    def to_proto(self, seed: int = -1) -> ArenasConfigurationsProto:
        arenas_configurations_proto = ArenasConfigurationsProto()
        arenas_configurations_proto.seed = seed

        for k in self.arenas:
            arenas_configurations_proto.arenas[k].CopyFrom(self.arenas[k].to_proto())

        return arenas_configurations_proto

In [7]:
with open('configurations/curriculum/0.yml') as f:
    print(f.read())

!ArenaConfig
arenas:
  -1: !Arena
    pass_mark: 0
    t: 250
    items:
    - !Item
      name: Agent
      positions:
      - !Vector3 {x: 20, y: 0, z: 20}
      rotations: [0]
    - !Item
      name: GoodGoal
      positions:
      - !Vector3 {x: 20, y: 0, z: 22}
      sizes:
      - !Vector3 {x: 1, y: 1, z: 1}


In [None]:
## Recreate the same curriculum as above

# First we create the agent
position_agent = Vector3(x = 20, y = 0, z = 20)
rotation_agent = 0
agent = Item(name = 'Agent', positions = [position_agent], rotations = [rotation_agent])

# Then we create the goal
position_goal = Vector3(x = 20, y = 0, z = 35)
sizes_goal = Vector3(x = 1, y = 1, z = 1)
goal = Item(name = 'GoodGoal', positions = [position_goal], sizes = [sizes_goal])

# Define list of items
items = [agent, goal]

# Create Arena
my_arena = Arena(t=250, items=items, pass_mark = 0, blackouts = None)

# create arena configuration
my_config = ArenaConfig(my_arena)
my_config.to_proto()

In [None]:
# visualize evironment just created

try:
    environment = AnimalAIEnvironment(
            file_name='env/AnimalAI',
            base_port=5007,
            arenas_configurations=my_config,
            play=True,
        )
except UnityCommunicationException:
    # you'll end up here if you close the environment window directly
    # always try to close it from script
    environment.close()

In [None]:
if environment:
    environment.close() # takes a few seconds

## 2. Function to create environment config given position of goals

In [4]:
## For simplicity, we only modify the position of the goal (position of agent is fixed)

def create_arena(x, y):
    '''
    Create an arena with only one agent and one goal.
    :param x: float. x position goal
    :param x: float. y position goal
    '''
    
    # Create agent
    position_agent = Vector3(x = 20, y = 0, z = 20)
    rotation_agent = 0
    agent = Item(name = 'Agent', positions = [position_agent], rotations = [rotation_agent])

    # Create the goal
    position_goal = Vector3(x = x, y = 0, z = y)
    sizes_goal = Vector3(x = 1, y = 1, z = 1)
    goal = Item(name = 'GoodGoal', positions = [position_goal], sizes = [sizes_goal])

    # Define list of items
    items = [agent, goal]

    # Create Arena
    my_arena = Arena(t=250, items=items, pass_mark = 0, blackouts = None)

    # create arena configuration
    my_config = ArenaConfig(my_arena)
    
    return my_config

In [5]:
create_arena(x=4, y=5)

<__main__.ArenaConfig at 0x7f586b731750>

In [None]:
# visualize evironment just created
try:
    environment = AnimalAIEnvironment(
            file_name='env/AnimalAI',
            base_port=5007,
            arenas_configurations=create_arena(x = 30, y = 5),
            play=True,
        )
except UnityCommunicationException:
    # you'll end up here if you close the environment window directly
    # always try to close it from script
    environment.close()

In [None]:
if environment:
    environment.close() # takes a few seconds

## 3. Train agents in environments created

In [6]:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from mlagents.trainers.trainer_util import load_config;
from animalai_train.run_options_aai import RunOptionsAAI;
from animalai_train.run_training_aai import run_training_aai;

# The loop!

In [7]:
import os
import pandas as pd

In [None]:
# define configurations
trainer_config_path = (
    "configurations/training_configurations/train_ml_agents_config_ppo_10fs.yaml"
)
environment_path = "env/AnimalAI"
run_id_protagonist = "protagonist"
base_port_protagonist = 5005
run_id_antagonist = "antatagonist"
base_port_antagonist = 5006

# first we create an easy arena
arena_config = create_arena(x=20, y=22)

# train protagonist
args = RunOptionsAAI(
    trainer_config=load_config(trainer_config_path),
    env_path=environment_path,
    run_id=run_id_protagonist,
    base_port=base_port_protagonist,
    #load_model=False,
    train_model=True,
    arena_config=arena_config 
)
logs_dir = "summaries/"
os.makedirs(logs_dir, exist_ok=True)
%load_ext tensorboard
%tensorboard --logdir {logs_dir}
run_training_aai(0, args)

# train antagonist
args = RunOptionsAAI(
    trainer_config=load_config(trainer_config_path),
    env_path=environment_path,
    run_id=run_id_antagonist,
    base_port=base_port_antagonist,
    #load_model=False,
    train_model=True,
    arena_config=arena_config 
)
logs_dir = "summaries/"
os.makedirs(logs_dir, exist_ok=True)
%load_ext tensorboard
%tensorboard --logdir {logs_dir}
run_training_aai(1, args)

In [8]:
def update_trainer(trainer_config_path, steps_add=1.0e5):
    with open(trainer_config_path) as f:
         list_doc = yaml.load(f)

    #list_doc['AnimalAI']['max_steps']  = 1.0e5
    list_doc['AnimalAI']['max_steps'] = float(list_doc['AnimalAI']['max_steps']) + steps_add

    with open(trainer_config_path, "w") as f:
        yaml.dump(list_doc, f)

In [9]:
# enter the loop 
# define configurations
trainer_config_path = (
    "configurations/training_configurations/train_ml_agents_config_ppo_10fs.yaml"
)
environment_path = "env/AnimalAI"
run_id_protagonist = "protagonist"
base_port_protagonist = 5007
run_id_antagonist = "antatagonist"
base_port_antagonist = 6008
nb_new_environments = 4

from random import randint
for i in range(nb_new_environments):
    
    x = randint(1, 39)
    y = randint(1, 39)
    if x == 20:
        x = 22
    if y == 20:
        y = 22
    print('x: ' + str(x) + ', y: ' + str(y))
    
    # generate random arena
    arena_config = create_arena(x = x, y = y)
    
    # add steps to the trainer
    update_trainer(trainer_config_path)
    
    # train protagonist
    args = RunOptionsAAI(
        trainer_config=load_config(trainer_config_path),
        env_path=environment_path,
        run_id=run_id_protagonist,
        base_port=base_port_protagonist + i,
        load_model=True,
        train_model=True,
        arena_config=arena_config 
    )
    run_training_aai(0, args)
    
    data_path = 'summaries/protagonist_AnimalAI.csv'
    df = pd.read_csv(data_path)
    print('PROTAGONIST: ')
    print('Steps: ', df.loc[0, 'Steps'], ' Cumulative reward: ', df.loc[0, 'Environment/Cumulative Reward'], \
     ' Episode Length: ', df.loc[0, 'Environment/Episode Length'])
    print('Steps: ', df.loc[int(len(df)/2), 'Steps'], ' Cumulative reward: ', df.loc[int(len(df)/2), 'Environment/Cumulative Reward'], \
         ' Episode Length: ', df.loc[int(len(df)/2), 'Environment/Episode Length'])
    print('Steps: ', df.loc[len(df)-1, 'Steps'], ' Cumulative reward: ', df.loc[len(df)-1, 'Environment/Cumulative Reward'], \
         ' Episode Length: ', df.loc[len(df)-1, 'Environment/Episode Length'])
    
    # train antagonist
    args = RunOptionsAAI(
        trainer_config=load_config(trainer_config_path),
        env_path=environment_path,
        run_id=run_id_antagonist,
        base_port=base_port_antagonist + i,
        load_model=True,
        train_model=True,
        arena_config=arena_config 
    )
    run_training_aai(1, args)
    
    data_path = 'summaries/antatagonist_AnimalAI.csv'
    df = pd.read_csv(data_path)
    print('ANTAGONIST: ')
    print('Steps: ', df.loc[0, 'Steps'], ' Cumulative reward: ', df.loc[0, 'Environment/Cumulative Reward'], \
     ' Episode Length: ', df.loc[0, 'Environment/Episode Length'])
    print('Steps: ', df.loc[int(len(df)/2), 'Steps'], ' Cumulative reward: ', df.loc[int(len(df)/2), 'Environment/Cumulative Reward'], \
         ' Episode Length: ', df.loc[int(len(df)/2), 'Environment/Episode Length'])
    print('Steps: ', df.loc[len(df)-1, 'Steps'], ' Cumulative reward: ', df.loc[len(df)-1, 'Environment/Cumulative Reward'], \
         ' Episode Length: ', df.loc[len(df)-1, 'Environment/Episode Length'])

x: 4, y: 17
Converting ./models/protagonist/AnimalAI/frozen_graph_def.pb to ./models/protagonist/AnimalAI.nn
GLOBALS: 'is_continuous_control', 'version_number', 'memory_size', 'action_output_shape'
IN: 'visual_observation_0': [-1, 84, 84, 3] => 'policy/main_graph_0_encoder0/conv_1/BiasAdd'
IN: 'vector_observation': [-1, 1, 1, 3] => 'policy/main_graph_0/hidden_0/BiasAdd'
IN: 'action_masks': [-1, 1, 1, 6] => 'policy_1/strided_slice'
IN: 'action_masks': [-1, 1, 1, 6] => 'policy_1/strided_slice_1'
OUT: 'policy/concat/concat', 'action'
DONE: wrote ./models/protagonist/AnimalAI.nn file.
PROTAGONIST: 
Steps:  310000  Cumulative reward:  -0.9651249320362696  Episode Length:  245.775
Steps:  410000  Cumulative reward:  -0.5592836298524065  Episode Length:  207.53191489361703
Steps:  500000  Cumulative reward:  -0.4974761332375739  Episode Length:  199.42857142857144
Converting ./models/antatagonist/AnimalAI/frozen_graph_def.pb to ./models/antatagonist/AnimalAI.nn
GLOBALS: 'is_continuous_control