# Walker


<img src="https://docs.unity3d.com/Packages/com.unity.ml-agents@4.0/manual/images/walker.png" alt="Your image title" width=30% height=30%/>

- Set-up: Physics-based Humanoid agents with 26 degrees of freedom. These DOFs correspond to articulation of the following body-parts: hips, chest, spine, head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction without falling.
- Agents: The environment contains 10 independent agents with same Behavior Parameters.
- Agent Reward Function (independent): The reward function is now geometric meaning the reward each step is a product of all the rewards instead of a sum, this helps the agent try to maximize all rewards instead of the easiest rewards.
    - Body velocity matches goal velocity. (normalized between (0,1))
    - Head direction alignment with goal direction. (normalized between (0,1))
- Behavior Parameters:
    - Vector Observation space: 243 variables corresponding to position, rotation, velocity, and angular velocities of each limb, along with goal direction.
    - Actions: 39 continuous actions, corresponding to target rotations and strength applicable to the joints.
    - Visual Observations: None
- Float Properties: Four
    - gravity: Magnitude of gravity
        - Default: 9.81
        - Recommen=ded Minimum:
        - Recommended Maximum:
    - hip_mass: Mass of the hip component of the walker
        - Default: 8
        - Recommended Minimum: 7
        - Recommended Maximum: 28
    - chest_mass: Mass of the chest component of the walker
        - Default: 8
        - Recommended Minimum: 3
        - Recommended Maximum: 20
    - spine_mass: Mass of the spine component of the walker
        - Default: 8
        - Recommended Minimum: 3
        - Recommended Maximum: 20
    - Benchmark Mean Reward : 2500

In [5]:
import os
import platform

# Global Setting
cur_dir = os.getcwd()
env_dir = os.path.abspath(os.path.join(cur_dir, "..", "Unity6000_Envs"))
output_dir = os.path.abspath(os.path.join(cur_dir, "temp", "mlagents_learn_output"))

In [6]:
# Unity Enviroment
game = "Walker"
os_name = platform.system()

if os_name == 'Linux':
    env_name = os.path.join(env_dir, f"{game}_{os_name}.x86_64")
elif os_name == 'Darwin':
    env_name = os.path.join(env_dir, f"{game}_{os_name}.app")
env_fp = os.path.join(env_dir, env_name)
print(env_fp)
baseport = 1091

/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Unity6000_Envs/Walker_Darwin.app


## Training PPO

In [None]:
config_ppo_fp = os.path.join(cur_dir, "config", "Walker_ppo.yaml")
run_ppo_id = "Walker_PPO"
print(config_ppo_fp)
print(run_ppo_id)

!mlagents-learn $config_ppo_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_ppo_id --base-port=$baseport

/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/temp_Walker_ppo.yaml
Walker_PPO

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity enviro

In [None]:
# config_ppo_fp = os.path.join(cur_dir, "config", "Walker_ppo.yaml")
# run_ppo_id = "Walker_PPO"

# !mlagents-learn $config_ppo_fp \
#                --env=$env_fp \
#                --results-dir=$output_dir \
#                --run-id=$run_ppo_id --base-port=$baseport --resume --inference --time-scale=1


            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity environment with package version 4.0.0 and communication version 1.5.0
[INFO] Connected new brain: Walker?team=0
[INFO] Hyperparameters 

## Training SAC

In [None]:
config_sac_fp = os.path.join(cur_dir, "config", "Walker_sac.yaml")
run_sac_id = "Walker_SAC"
print(config_sac_fp)
print(run_sac_id)

!mlagents-learn $config_sac_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_sac_id --base-port=$baseport


/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/temp_Walker_sac.yaml
Walker_SAC

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity enviro

## Training POCA

In [8]:
config_poca_fp = os.path.join(cur_dir, "config", "Walker_poca.yaml")
run_poca_id = "Walker_POCA"
print(config_poca_fp)
print(run_poca_id)

!mlagents-learn $config_poca_fp \
               --env=$env_fp \
               --results-dir=$output_dir \
               --run-id=$run_poca_id --base-port=$baseport


/Users/hyunjae.k/110_HyunJae_Git/2025_Playgrounds/Unity_Robotics_Playgrounds/Agent_Scripts/config/Walker_poca.yaml
Walker_POCA

            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        
 Version information:
  ml-agents: 1.1.0,
  ml-agents-envs: 1.1.0,
  Communicator API: 1.5.0,
  PyTorch: 2.8.0
[INFO] Connected to Unity environme