# GECCO 2025
# MAPLE: Multi-Action Program through Linear Evolution for Continuous Multi-Action Reinforcement Learning
## Quentin Vacher, Stephen Kelly, Ali Naqvi, Nicolas Beuve, Tanya Djavaherpour, Mickaël Dardaillon and Karols Desnos

This notebook allows to reproduce the results in the paper cited above for the MAPLE, MATPG and TPG algorithms

To run this notebook, be sure to have compile the projet (with the same OS as the notebook!)

In [None]:
import os
import subprocess
import sys

main_dir = "gegelati-apps/mujoco/"
bin_dir = main_dir + "bin"

def run_command(command, cwd=None, verbose=True):
    if(verbose):
        print(f"\n> Executing: {' '.join(command)}")

    stdout = None if verbose else subprocess.DEVNULL
    stderr = None if verbose else subprocess.DEVNULL

    result = subprocess.run(command, cwd=cwd, stdout=stdout, stderr=stderr)
    if result.returncode != 0:
        print(f"Command failed: {' '.join(command)}")
        sys.exit(result.returncode)

### To train on a single environment and single algorithm with N seeds

In [None]:
# Number of seeds to run
nbSeed = 10

# Environment and algorithm
environment = "hopper"
algo = "MAPLE"

# Initial seed
testSeed = 123456789

# Parameter file
parameterFile = "params/{}/{}/params_{}.p0.json".format(algo, environment, environment)

# Log folder
outputFolder = "retrained_logs/{}/{}/".format(algo, environment)
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)

# If True, a file with all agents will be saved at each generation, else only the file of the current generation is saved replacing the former generation.
saveAllFiles = False

for i in range(nbSeed):

    print(f"Training {i + 1}/{nbSeed}: {algo} - {environment} - seed {i}")

    cmd = [
        "./bin/Release/mujoco",
        "-l", "../../" + outputFolder,
        "-p", f"../../logs/{algo}/{environment}/exported_params.{environment}.p0.json",
        "-u", environment,
        "-s", f"{testSeed + i}",
        "-g", f"{int(saveAllFiles)}"
    ]

    try:
        run_command(cmd, cwd=main_dir, verbose=True)
    except RuntimeError as err:
        print(f"\n{err}")
        print(f"Error with algo={algo}, env={environment}, seed={i}")

    nbRunned += 1


### To train all the results of the paper

It would take a really long time though, this cell below is an example, we advise to use a computation cluster to run all training in parallel. 

The number of CPUs can be specified in the parameter files

In [None]:
# Number of seeds to run
nbSeed = 10

# Environments and algorithms
allEnvironments = [
    "inverted_double_pendulum",
    "hopper",
    "half_cheetah",
    "walker2d",
    "ant",
    "humanoid"
]

allAlgos = [
    "MAPLE",
    "MATPG",
    "TPG"
]

# Initial seed
testSeed = 123456789

# Parameter file
parameterFile = "params/{}/{}/params_{}.p0.json".format(algo, environment, environment)

# Log folder
outputFolder = "retrained_logs/{}/{}/".format(algo, environment)
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)

# If True, a file with all agents will be saved at each generation, else only the file of the current generation is saved replacing the former generation.
saveAllFiles = False

for algo in allAlgos:
    for environment in allEnvironments:
        for i in range(nbSeed):

            print(f"Training {i + 1}/{nbSeed}: {algo} - {environment} - seed {i}")

            cmd = [
                "./bin/Release/mujoco",
                "-l", "../../" + outputFolder,
                "-p", f"../../logs/{algo}/{environment}/exported_params.{environment}.p0.json",
                "-u", environment,
                "-s", f"{testSeed + i}",
                "-g", f"{int(saveAllFiles)}"
            ]

            try:
                run_command(cmd, cwd=main_dir, verbose=True)
            except RuntimeError as err:
                print(f"\n{err}")
                print(f"Error with algo={algo}, env={environment}, seed={i}")

            nbRunned += 1


### To visualise the results

The best is to run the best agent at the end of a training (out_best....dot).

You can also run with the file of all the agent of a generation (out_lastGen....dot), however since there will be multiple agents, one generation will be launched to keep the best agent.

In [None]:
# configuration runned
seed = 0
environments = "hopper"
algo = "MAPLE"

# Seed test onto
testSeed = 987654321

# Agent and parameter files
agent_file_location = f"logs/{algo}/{environment}/out_best.{seed}.p0.{environment}.dot"
params_file_location = f"logs/{algo}/{environment}/exported_params.{environment}.p0.json"

# If you want a really fast visualisation
fastVisualisation = True

run_command(["./scripts/scriptServerMujoco.sh"], cwd=main_dir)

os.environ["DISPLAY"] = ":0"

cmd = [
    "./bin/Release/renderMujoco",
    "-d", "../../" + agent_file_location,
    "-p", "../../" + params_file_location,
    "-u", f"{environment}",
    "-s", f"{testSeed}",
    "-v", f"{int(fastVisualisation)}"
]

try:
    run_command(cmd, cwd=main_dir, verbose=True)
except RuntimeError as err:
    print(f"\n{err}")
    print(f"Error with algo={algo}, env={environment}, seed={i}")

nbRunned += 1
