# Walker


<img src="https://docs.unity3d.com/Packages/com.unity.ml-agents@4.0/manual/images/worm.png" alt="Your image title" width=30% height=30%/>

- Set-up: A worm with a head and 3 body segments.
- Goal: The agents must move its body toward the goal direction.
- Agents: The environment contains 10 agents with same Behavior Parameters.
- Agent Reward Function (independent): The reward function is now geometric meaning the reward each step is a product of all the rewards instead of a sum, this helps the agent try to maximize all rewards instead of the easiest rewards.
    - Body velocity matches goal velocity. (normalized between (0,1))
    - Body direction alignment with goal direction. (normalized between (0,1))
- Behavior Parameters:
    - Vector Observation space: 64 variables corresponding to position, rotation, velocity, and angular velocities of each limb plus the acceleration and angular acceleration of the body.
    - Actions: 9 continuous actions, corresponding to target rotations for joints.
    - Visual Observations: None
- Float Properties: None
- Benchmark Mean Reward: 800


In [1]:
import os
import platform

# Global Setting
cur_dir = os.getcwd()
env_dir = os.path.abspath(os.path.join(cur_dir, "..", "Unity6000_Envs"))
output_dir = os.path.abspath(os.path.join(cur_dir, "temp", "mlagents_learn_output"))

In [2]:
# Unity Enviroment
game = "Worm"
os_name = platform.system()

if os_name == 'Linux':
    env_name = os.path.join(env_dir, f"{game}_{os_name}.x86_64")
elif os_name == 'Darwin':
    env_name = os.path.join(env_dir, f"{game}_{os_name}.app")
env_fp = os.path.join(env_dir, env_name)
print(env_fp)
baseport = 1991

/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Unity6000_Envs/Worm_Darwin.app


## Training PPO

In [3]:
config_ppo_fp = os.path.join(cur_dir, "config", "Worm_ppo.yaml")
run_ppo_id = "Worm_PPO"
print(config_ppo_fp)
print(run_ppo_id)

!mlagents-learn $config_ppo_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_ppo_id --base-port=$baseport

/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/Worm_ppo.yaml
Worm_PPO

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity environment wit

In [4]:
# config_ppo_fp = os.path.join(cur_dir, "config", "Worm_ppo.yaml")
# run_ppo_id = "Worm_PPO"

# !mlagents-learn $config_ppo_fp \
#                --env=$env_fp \
#                --results-dir=$output_dir \
#                --run-id=$run_ppo_id --base-port=$baseport --resume --inference --time-scale=1

## Training SAC

In [5]:
config_sac_fp = os.path.join(cur_dir, "config", "Worm_sac.yaml")
run_sac_id = "Worm_SAC"
print(config_sac_fp)
print(run_sac_id)

!mlagents-learn $config_sac_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_sac_id --base-port=$baseport


/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/Worm_sac.yaml
Worm_SAC

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity environment wit

## Training POCA

In [7]:
config_poca_fp = os.path.join(cur_dir, "config", "Worm_poca.yaml")
run_poca_id = "Worm_POCA"
print(config_poca_fp)
print(run_poca_id)

!mlagents-learn $config_poca_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_poca_id --base-port=$baseport


/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/Worm_poca.yaml
Worm_POCA

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity environment w