In [1]:
from og_marl.vault_utils.download_vault import *
from og_marl.vault_utils.analyse_vault import *
import os
import json
from IPython.display import display, HTML
import shutil

In [6]:
def make_one_card(source,env,task,datacard_dict):
    info_1_card = datacard_dict[source][env][task]

    # make the relevant details table
    table_components = ["Environment name","Version","Agents","Action type","Observation size","Reward type"]

    version_dict = {
        "og_marl":{
            "smac_v1": "<a href='https://github.com/oxwhirl/smac/releases/tag/v1'>SMAC V1</a>, from OxWhiRL",
            "smac_v2": "<a href='https://github.com/oxwhirl/smacv2'>SMAC V2</a>, from OxWhiRL",
            "mamujoco": "<a href='https://github.com/schroederdewitt/multiagent_mujoco/releases/tag/v1.1'>V1.1</a>, Mujoco v210",
        },
        "cfcql":{
            "smac_v1": "<a href='https://github.com/oxwhirl/smac/releases/tag/v1'>SMAC V1</a>, from OxWhiRL",
        },
        "alberdice":{
            "rware": "<a href='https://github.com/dematsunaga/alberdice/tree/main/marl_env/marl/env'>Code included in Alberdice repository</a>",
        },
        "omar":{
            "mpe":"<a href='https://github.com/ling-pan/OMAR/tree/master/multiagent-particle-envs'>Code included in OMAR repository<a>",
            "mamujoco": "<a href='https://github.com/schroederdewitt/multiagent_mujoco/releases/tag/v1.0'>V1.0</a>, Mujoco v200",
        },
        "omiga": {
            "smac_v1":"Modified version of SMAC v1, popularised by <a href='https://github.com/sanmuyang/multi-agent-PPO-on-SMAC?tab=readme-ov-file'>MAPPO </a>",
            "mamujoco": "<a href='https://github.com/schroederdewitt/multiagent_mujoco/releases/tag/v1.0'>V1.0</a>, Mujoco v200",
        }
    }

    action_type_dict = {
        "smac_v1": "Discrete",
        "smac_v2": "Discrete",
        "mamujoco": "Continuous",
        "mpe": "Discrete",
        "rware": "Discrete",
    }

    uid = list(info_1_card.keys())[0]

    # action_size = 1
    # if len(info_1_card[uid]['Structure']['actions'])>3:
    #     action_size = info_1_card[uid]['Structure']['actions'][2:]

    tab_1 = "<table><tr>"
    for comp in table_components:
        tab_1+=f"<th>{comp}</th>"
    tab_1+="</tr><tr>"
    tab_1+=f"<td>{info_1_card[uid]['Environment name']}</td>"
    tab_1+=f"<td>{version_dict[source][env]}</td>"
    tab_1+=f"<td>{info_1_card[uid]['Structure']['actions'][2]}</td>"
    tab_1+=f"<td>{action_type_dict[env]}</td>"
    tab_1+=f"<td>{info_1_card[uid]['Structure']['observations'][3:]}</td>"
    tab_1+=f"<td>Dense</td>"
    tab_1+="</tr></table>"

    # Make the summary statistics table
    table_components = ["Min return","Max return","Transitions","Trajectories","Joint SACo"]

    html_table = "<table><tr>"
    html_table+="<th>Uid</th>"
    html_table+=f"<th>Episode return mean</th>"
    for comp in table_components:
        html_table+=f"<th>{comp}</th>"

    for uid in info_1_card.keys():
        html_table+="</tr><tr>"
        html_table+=f"<td>{uid}</td>"
        html_table+=str(fr"<td>{info_1_card[uid]['Mean episode return']:.2f} ")+'&#177;'+ str(f" {info_1_card[uid]['Standard deviation episode return']:.2f}</td>")
        for comp in table_components[:2]:
            html_table+=f"<td>{info_1_card[uid][comp]:.2f}</td>"
        for comp in table_components[2:-1]:
            html_table+=f"<td>{info_1_card[uid][comp]}</td>"
        for comp in table_components[-1:]:
            html_table+=f"<td>{info_1_card[uid][comp]:.2f}</td>"
    html_table+="</tr></table>"

    # image url
    # image_url = info_1_card[uid]['Histogram download url']
    # image_url = "/home/louise/workspace/og-marl/docs/assets/vault_plots/og_marl/smac_v1/5m_vs_6m_histogram.pdf"
    image_url = f"../assets/vault_plots/{source}/{env}/{task}_histogram.png"
    # print(image_url)
    # image_url = 'blob:null/a6506be2-d5d5-4ba5-a39f-dfe2cdc618b6'

    one_datacard = f"<div class=\"card\"><img src=\"{image_url}\" alt=\"{info_1_card[uid]['Scenario name']}\" class=\"card-img\"><div class=\"card-content\"><h2>{info_1_card[uid]['Scenario name']} - <a href='{info_1_card[uid]['Download link']}'>Download</a></h2><h3>Metadata</h3><p>{tab_1}</p><h3>Generation procedure for each dataset</h3><p>{info_1_card[uid]['Generation procedure']}</p><h3>Summary statistics</h3><p>{html_table}</p></div></div>"

    # display(HTML(one_datacard))
    print(one_datacard)
    
    return one_datacard

In [3]:

with open("./datacard_info.json") as current_info:
    # get json string
    datacard_str = json.load(current_info)

    # convert to dictionary
    datacard_dict = json.loads(datacard_str)

In [7]:
# source = "og_marl"

pretty_source_names = {
    "og_marl": "OG MARL",
    "cfcql": "CFCQL",
    "omiga": "OMIGA",
    "omar":"OMAR",
    "alberdice":"Alberdice",
}

with open("./datacard_info.json") as current_info:
    # get json string
    datacard_str = json.load(current_info)

    # convert to dictionary
    datacard_dict = json.loads(datacard_str)

for source in datacard_dict.keys():
    start = f"<!DOCTYPE html><html lang=\"en\"><head>    <meta charset=\"UTF-8\">    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">    <title>Dataset Cards - {pretty_source_names[source]}</title>    <link rel=\"stylesheet\" href=\"styles.css\">    <style>    * "
    start += "{    margin: 0;    padding: 0;    box-sizing: border-box;} body {    font-family: Arial, sans-serif;    background-color: #f4f4f4;} .container {    max-width: 1200px;    margin: 0 auto;    padding: 20px;} h1 {    text-align: center;    margin-bottom: 20px;} .card-grid {    display: grid;    grid-template-columns: repeat(auto-fill, minmax(500px, 1fr));    gap: 20px;} .card {    background-color: white;    border-radius: 8px;    box-shadow: 0 2px 5px rgba(0,0,0,0.1);    overflow: hidden;} .card-img {    width: 100%;    height: auto;} .card-content {    padding: 15px;} .card-content h2 {    font-size: 1.5em;    margin-bottom: 10px;} .card-content p {    color: #555;}    </style></head><body>    <div class=\"container\">        "
    start+= f"<h1>Dataset Cards - {pretty_source_names[source]}</h1>        <div class=\"card-grid\">"

    for env in datacard_dict[source].keys():
        if not env=="gymnasium_mamujoco":
            for task in datacard_dict[source][env].keys():
                start += make_one_card(source,env,task,datacard_dict)

    start += "</div></div></body></html>"

    display(HTML(start))

    with open(f'{source}.md', 'w') as f:
        f.write(start)


<div class="card"><img src="../assets/vault_plots/omiga/smac_v1/2c_vs_64zg_histogram.png" alt="2c_vs_64zg" class="card-img"><div class="card-content"><h2>2c_vs_64zg - <a href='https://huggingface.co/datasets/InstaDeepAI/og-marl/resolve/main/prior_work/omiga/smac_v1/2c_vs_64zg.zip'>Download</a></h2><h3>Metadata</h3><p><table><tr><th>Environment name</th><th>Version</th><th>Agents</th><th>Action type</th><th>Observation size</th><th>Reward type</th></tr><tr><td>SMAC (v1)</td><td>Modified version of SMAC v1, popularised by <a href='https://github.com/sanmuyang/multi-agent-PPO-on-SMAC?tab=readme-ov-file'>MAPPO </a></td><td>2</td><td>Discrete</td><td>[478]</td><td>Dense</td></tr></table></p><h3>Generation procedure for each dataset</h3><p>Converted from omiga format to a Vault.</p><h3>Summary statistics</h3><p><table><tr><th>Uid</th><th>Episode return mean</th><th>Min return</th><th>Max return</th><th>Transitions</th><th>Trajectories</th><th>Joint SACo</th></tr><tr><td>Poor</td><td>8.91 &#1

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"Modified version of SMAC v1, popularised by MAPPO",2,Discrete,[478],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,8.91 ± 1.01,2.53,10.0,10830,348,1.0
Medium,13.00 ± 1.39,10.01,15.0,37940,1001,1.0
Good,19.94 ± 1.26,15.18,21.61,59215,1001,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"Modified version of SMAC v1, popularised by MAPPO",6,Discrete,[172],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,9.12 ± 0.81,4.8,9.99,24255,1001,1.0
Medium,11.97 ± 1.26,10.0,14.99,29511,1001,1.0
Good,17.84 ± 2.15,15.01,20.02,38040,1001,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"Modified version of SMAC v1, popularised by MAPPO",5,Discrete,[124],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,8.50 ± 1.19,1.81,9.89,22747,1001,0.96
Medium,11.03 ± 0.58,10.08,11.96,27717,1001,0.95
Good,20.00 ± 0.00,20.0,20.0,27734,1001,0.96

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"Modified version of SMAC v1, popularised by MAPPO",6,Discrete,[346],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,4.93 ± 1.71,0.0,9.99,51268,1001,1.0
Medium,13.07 ± 1.27,10.02,14.99,126012,1001,1.0
Good,19.88 ± 1.01,15.01,20.49,100170,1001,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MAMuJoCo,"V1.0, Mujoco v200",6,Continuous,[23],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Medium-Replay,655.76 ± 590.40,-198.77,2132.6,1001000,1000,1.0
Medium-Expert,2105.38 ± 1073.24,251.94,3866.09,2002000,2000,1.0
Medium,1425.66 ± 520.12,251.94,2113.52,1001000,1000,1.0
Expert,2785.10 ± 1053.14,317.94,3866.09,1001000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MAMuJoCo,"V1.0, Mujoco v200",2,Continuous,[113],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Medium-Replay,1029.51 ± 141.27,895.37,1517.06,1751750,1750,0.66
Medium-Expert,1736.88 ± 319.64,840.77,2124.15,2002000,2000,1.0
Medium,1418.70 ± 37.04,840.77,1473.86,1001000,1000,1.0
Expert,2055.07 ± 22.07,1994.03,2124.15,1001000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MAMuJoCo,"V1.0, Mujoco v200",3,Continuous,[14],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Medium-Replay,746.42 ± 671.89,70.76,2801.15,1314826,4160,1.0
Medium-Expert,1190.61 ± 973.40,95.27,3762.69,1919782,5481,1.0
Medium,723.57 ± 211.66,128.38,2776.49,919391,4000,1.0
Expert,2452.02 ± 1097.86,95.27,3762.69,1000391,1481,1.0


<div class="card"><img src="../assets/vault_plots/og_marl/smac_v1/3m_histogram.png" alt="3m" class="card-img"><div class="card-content"><h2>3m - <a href='https://huggingface.co/datasets/InstaDeepAI/og-marl/resolve/main/core/smac_v1/3m.zip'>Download</a></h2><h3>Metadata</h3><p><table><tr><th>Environment name</th><th>Version</th><th>Agents</th><th>Action type</th><th>Observation size</th><th>Reward type</th></tr><tr><td>SMAC (v1)</td><td><a href='https://github.com/oxwhirl/smac/releases/tag/v1'>SMAC V1</a>, from OxWhiRL</td><td>3</td><td>Discrete</td><td>[30]</td><td>Dense</td></tr></table></p><h3>Generation procedure for each dataset</h3><p>A QMIX system was trained to target level of performance. The learnt policy was then rolled out to collect approximately 250k transitions. An epsilon greedy policy with eps=0.05 was used. This procedure was repeated 4 times and the data was combined.</p><h3>Summary statistics</h3><p><table><tr><th>Uid</th><th>Episode return mean</th><th>Min return</t

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",3,Discrete,[30],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,4.69 ± 2.14,0.0,20.0,997370,48779,0.81
Medium,9.96 ± 6.06,0.0,20.0,995313,41619,0.85
Good,16.49 ± 5.92,0.0,20.0,996366,43559,0.8

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",8,Discrete,[80],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,5.28 ± 0.56,0.0,7.62,995144,20629,0.64
Medium,10.14 ± 3.34,0.0,20.0,996501,39208,0.96
Good,16.86 ± 4.33,0.19,20.0,997785,30638,0.86

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",5,Discrete,[55],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,7.45 ± 1.48,0.0,20.0,934505,45501,0.85
Medium,12.62 ± 5.06,0.0,20.0,996856,39284,0.87
Good,16.58 ± 4.69,0.0,20.0,996727,36311,0.84

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",5,Discrete,[80],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,6.88 ± 2.06,0.0,13.61,996418,9942,0.96
Medium,12.57 ± 3.14,0.0,21.3,996256,18605,0.98
Good,18.32 ± 2.95,0.0,21.62,995829,18616,0.98

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",8,Discrete,[136],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,5.90 ± 2.22,0.19,11.93,996474,17807,0.96
Medium,10.69 ± 1.49,0.0,17.67,996699,18866,0.97
Good,16.56 ± 3.72,6.3,24.46,996528,7315,0.97

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v2),"SMAC V2, from OxWhiRL",5,Discrete,[82],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Replay,10.05 ± 5.84,0.0,36.34,898164,17958,1.0
Random,2.43 ± 1.73,0.0,16.18,1500000,37874,0.91

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v2),"SMAC V2, from OxWhiRL",10,Discrete,[162],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Replay,6.32 ± 3.62,0.0,23.01,749850,13588,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v2),"SMAC V2, from OxWhiRL",5,Discrete,[82],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Replay,7.34 ± 3.60,0.0,24.0,863281,23294,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MAMuJoCo,"V1.1, Mujoco v210",2,Continuous,[13],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Poor,400.45 ± 333.96,-191.49,905.03,1000000,1000,1.0
Medium,1485.00 ± 469.14,689.43,2332.17,1000000,1000,1.0
Good,6924.11 ± 1270.39,803.12,9132.25,1000000,1000,1.0


<div class="card"><img src="../assets/vault_plots/cfcql/smac_v1/6h_vs_8z_histogram.png" alt="6h_vs_8z" class="card-img"><div class="card-content"><h2>6h_vs_8z - <a href='https://huggingface.co/datasets/InstaDeepAI/og-marl/resolve/main/prior_work/cfcql/smac_v1/6h_vs_8z.zip'>Download</a></h2><h3>Metadata</h3><p><table><tr><th>Environment name</th><th>Version</th><th>Agents</th><th>Action type</th><th>Observation size</th><th>Reward type</th></tr><tr><td>SMAC (v1)</td><td><a href='https://github.com/oxwhirl/smac/releases/tag/v1'>SMAC V1</a>, from OxWhiRL</td><td>6</td><td>Discrete</td><td>[78]</td><td>Dense</td></tr></table></p><h3>Generation procedure for each dataset</h3><p>Converted from cfcql format to a Vault.</p><h3>Summary statistics</h3><p><table><tr><th>Uid</th><th>Episode return mean</th><th>Min return</th><th>Max return</th><th>Transitions</th><th>Trajectories</th><th>Joint SACo</th></tr><tr><td>Mixed</td><td>17.81 &#177; 2.88</td><td>9.14</td><td>20.17</td><td>217723</td><td>5

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",6,Discrete,[78],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Mixed,17.81 ± 2.88,9.14,20.17,217723,5000,0.24
Medium-Replay,12.97 ± 2.22,0.81,20.03,182403,5000,1.0
Medium,16.63 ± 3.03,9.8,20.0,207008,5000,0.12
Expert,19.01 ± 2.11,9.14,20.17,228120,5000,0.12

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",3,Discrete,[48],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Mixed,21.04 ± 2.51,5.58,29.0,888375,5000,0.23
Medium-Replay,18.85 ± 4.20,4.03,28.53,1082739,5000,0.99
Medium,20.86 ± 3.47,5.58,29.0,1174576,5000,0.11
Expert,21.19 ± 0.70,9.21,24.87,600520,5000,0.12

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",5,Discrete,[55],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Mixed,15.11 ± 5.11,6.38,20.0,131703,5000,0.22
Medium-Replay,9.02 ± 2.59,4.57,20.0,118405,5000,0.96
Medium,12.05 ± 4.36,6.38,20.0,135256,5000,0.1
Expert,18.17 ± 3.79,7.13,20.0,128536,5000,0.12

Environment name,Version,Agents,Action type,Observation size,Reward type
SMAC (v1),"SMAC V1, from OxWhiRL",5,Discrete,[80],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Mixed,16.39 ± 4.33,7.96,20.27,232528,5000,0.25
Medium-Replay,7.94 ± 3.41,2.0,20.12,100121,1976,1.0
Medium,12.76 ± 3.32,7.96,20.27,253992,5000,0.12
Expert,19.97 ± 0.37,13.9,20.08,211832,5000,0.12


<div class="card"><img src="../assets/vault_plots/alberdice/rware/small-2ag_histogram.png" alt="small-2ag" class="card-img"><div class="card-content"><h2>small-2ag - <a href='https://huggingface.co/datasets/InstaDeepAI/og-marl/resolve/main/prior_work/alberdice/rware/small-2ag.zip'>Download</a></h2><h3>Metadata</h3><p><table><tr><th>Environment name</th><th>Version</th><th>Agents</th><th>Action type</th><th>Observation size</th><th>Reward type</th></tr><tr><td>RWARE</td><td><a href='https://github.com/dematsunaga/alberdice/tree/main/marl_env/marl/env'>Code included in Alberdice repository</a></td><td>2</td><td>Discrete</td><td>[71]</td><td>Dense</td></tr></table></p><h3>Generation procedure for each dataset</h3><p>Converted from alberdice format to a Vault.</p><h3>Summary statistics</h3><p><table><tr><th>Uid</th><th>Episode return mean</th><th>Min return</th><th>Max return</th><th>Transitions</th><th>Trajectories</th><th>Joint SACo</th></tr><tr><td>Expert</td><td>7.12 &#177; 2.07</td><t

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,2,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,7.12 ± 2.07,1.13,12.37,500000,1000,0.99

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,4,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,9.49 ± 0.84,3.93,12.08,500000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,6,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,10.76 ± 0.68,7.59,12.69,500000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,2,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,12.77 ± 1.56,1.97,16.81,500000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,4,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,15.67 ± 1.20,10.4,18.63,500000,1000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
RWARE,Code included in Alberdice repository,6,Discrete,[71],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Expert,17.45 ± 1.01,11.88,19.97,500000,1000,1.0


<div class="card"><img src="../assets/vault_plots/omar/mpe/simple_spread_histogram.png" alt="simple_spread" class="card-img"><div class="card-content"><h2>simple_spread - <a href='https://huggingface.co/datasets/InstaDeepAI/og-marl/resolve/main/prior_work/omar/mpe/simple_spread.zip'>Download</a></h2><h3>Metadata</h3><p><table><tr><th>Environment name</th><th>Version</th><th>Agents</th><th>Action type</th><th>Observation size</th><th>Reward type</th></tr><tr><td>MPE</td><td><a href='https://github.com/ling-pan/OMAR/tree/master/multiagent-particle-envs'>Code included in OMAR repository<a></td><td>3</td><td>Discrete</td><td>[18]</td><td>Dense</td></tr></table></p><h3>Generation procedure for each dataset</h3><p>Converted from omar format to a Vault.</p><h3>Summary statistics</h3><p><table><tr><th>Uid</th><th>Episode return mean</th><th>Min return</th><th>Max return</th><th>Transitions</th><th>Trajectories</th><th>Joint SACo</th></tr><tr><td>Random</td><td>159.57 &#177; 60.46</td><td>-5.43

Environment name,Version,Agents,Action type,Observation size,Reward type
MPE,Code included in OMAR repository,3,Discrete,[18],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Random,159.57 ± 60.46,-5.43,510.05,1000000,40000,1.0
Medium-Replay,203.74 ± 80.49,35.69,582.09,97500,3900,1.0
Medium,273.39 ± 92.06,27.35,649.51,1000000,40000,1.0
Expert,530.95 ± 71.41,54.96,743.89,1000000,40000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MPE,Code included in OMAR repository,4,Discrete,[16],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Random,-4.13 ± 10.81,-20.18,117.09,1000000,40000,1.0
Medium-Replay,3.90 ± 20.28,-17.11,146.12,62500,2500,1.0
Medium,116.36 ± 58.86,-12.66,418.25,1000000,40000,1.0
Expert,207.90 ± 77.51,-16.04,549.2,1000000,40000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MPE,Code included in OMAR repository,4,Discrete,[24],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Random,-6.83 ± 5.74,-17.81,54.41,1000000,40000,1.0
Medium-Replay,1.23 ± 13.49,-17.56,112.9,80000,3200,1.0
Medium,65.86 ± 29.55,-9.15,198.82,1000000,40000,1.0
Expert,85.21 ± 31.11,-11.55,238.7,1000000,40000,1.0

Environment name,Version,Agents,Action type,Observation size,Reward type
MAMuJoCo,"V1.0, Mujoco v200",2,Continuous,[6],Dense

Uid,Episode return mean,Min return,Max return,Transitions,Trajectories,Joint SACo
Random,-282.89 ± 77.50,-516.9,-62.62,1000000,1000,1.0
Medium-Replay,423.49 ± 655.68,-509.1,1993.0,460000,460,1.0
Medium,1568.87 ± 273.38,20.49,1904.56,1000000,1000,1.0
Expert,3338.69 ± 252.58,852.45,3605.42,1000000,1000,1.0
