In [15]:
from natural20.session import Session
from natural20.gym.dndenv import dndenv, action_type_to_int
from gymnasium import register, envs, make
from natural20.gym.llm_helpers.prompting_utils import action_to_prompt
from natural20.gym.dndenv import embedding_loader
from natural20.event_manager import EventManager
from natural20.gym.dqn.policy import ModelPolicy
from llm_interface import GPT4Interfacer
import torch
import random
import numpy as np
from tqdm.autonotebook import tqdm

In [16]:
ROUND_PER_MATCH = 30
# setup VLLM endpoints
LLAMA3_URL = "http://localhost:8001/v1"
MISTRAL_URL = "http://localhost:8000/v1"
GPT4_TOKEN = "OPENAI_GPT_TOKEN_HERE"
WEIGHTS_FOLDER = "model_weights_all"

In [17]:
event_manager = EventManager()
event_manager.standard_cli()
session = Session(root_path="map_with_obstacles", event_manager=event_manager)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
class Agent:
    def action(self, observation, info):
        return random.choice(info['available_moves'])

class CustomAgent(Agent):
    def __init__(self, llm_interface):
        self.llm_interface = llm_interface

    def action(self, observation, info):
        return self.llm_interface.select_action_for_state(observation, info)
    def __str__(self) -> str:
        return "Custom LLM Agent"

class ModelAgent(Agent):
    def __init__(self, model_policy):
        self.model_policy = model_policy

    def action(self, observation, info):
        return self.model_policy.action(observation, info)


Setup the appropriate URLs to your vLLM instances

In [20]:
def prompt_for_variant(variant):
    if variant == "llama3":
        prompt = GPT4Interfacer(debug=False, tools=False, base_url=LLAMA3_URL, api_key="token1234", variant='NousResearch/Meta-Llama-3.1-8B-Instruct')
    elif variant == "gpt4":
        prompt = GPT4Interfacer(debug=False, tools=True, api_key=GPT4_TOKEN, variant='gpt-4o-mini')
    elif variant == "mistral":
        prompt = GPT4Interfacer(debug=False, tools=False, base_url=MISTRAL_URL, api_key="token1234", variant='mistralai/Mistral-7B-Instruct-v0.3')
    else:
        raise ValueError(f"Invalid variant: {variant}")

    return prompt

In [21]:
def increment_result_by_class(type, info, wins_or_loss_by_class=None):
    for p in info[type]:
        if p.class_descriptor() not in wins_or_loss_by_class:
            wins_or_loss_by_class[p.class_descriptor()] = 0
        wins_or_loss_by_class[p.class_descriptor()] += 1


def start_game(player="rl_rules_trained", adversary="llm_llama3", output_file=None):
    player_agent = None
    if player == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_dnd_egreedy.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player.startswith("llm"):
        prompt = prompt_for_variant(player.split("_")[1])
        player_agent = CustomAgent(prompt)
    elif player == "random":
        player_agent = Agent()
    else:
        raise ValueError(f"Invalid player: {player}")

    # Setup Adversary
    if adversary == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_dnd_egreedy.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary.startswith("llm"):
        prompt = prompt_for_variant(adversary.split("_")[1])
        adversary_agent = CustomAgent(prompt)
    elif adversary == "ai":
        adversary_agent = None
    elif adversary == "random":
        adversary_agent = Agent()
    else:
        raise ValueError(f"Invalid adversary: {adversary}")

    def reaction_callback(state, reward, done, truncated, info):
        """
        Callback function to be called when the environment is waiting for a reaction from the agent.
        Reactions in DnD are typically reactions to enemy actions, such as opportunity attacks.
        """
        print(f"{info['reactor']}: Reaction for {info['trigger']}:")
        action = player_agent.action(state, info)
        return action

    env = make("dndenv-v0", root_path="map_with_obstacles",
           render_mode="ansi",
           custom_agent=adversary_agent,
           output_file=output_file,
           profiles=lambda: random.choice(['high_elf_fighter','halfling_rogue','high_elf_mage','dwarf_cleric']),
                enemies=lambda: random.choice(['high_elf_fighter','halfling_rogue','high_elf_mage','dwarf_cleric']),
                map_file=lambda: random.choice(['maps/simple_map',\
                                                'maps/complex_map', \
                                                'maps/game_map', \
                                                'maps/walled_map']),
           debug=False,
           show_logs=False)
    wins = 0
    losses = 0
    ties = 0
    errors = 0

    total_rounds = []
    wins_by_class = {}
    loss_by_class = {}
    for round in tqdm(range(ROUND_PER_MATCH), leave=False):
        try:
            print(f"Round {round + 1}")

            observation, info = env.reset(reaction_callback=reaction_callback)
            last_info = info
            action = player_agent.action(observation, info)
            terminal = False
            steps = 0
            while not terminal and steps < 512:
                steps += 1
                observation, reward, terminal, truncated, info = env.step(action)
                last_info = info
                if not terminal and not truncated:
                    action = player_agent.action(observation, info)

                if terminal or truncated:
                    print(f"Reward: {reward}")
                    if reward == 10:
                        wins += 1
                        increment_result_by_class('players', info, wins_by_class)
                        increment_result_by_class('enemies', info, loss_by_class)
                    elif reward == -10:
                        losses += 1
                        increment_result_by_class('players', info, loss_by_class)
                        increment_result_by_class('enemies', info, wins_by_class)
                    else:
                        ties += 1

                    break

            total_rounds.append(info['round'])
        except Exception as e:
            print(f"Error: {e}")
            errors += 1
            continue

    print(f"Wins: {wins}, Losses: {losses}, Ties: {ties}")
    return wins, losses, ties, np.mean(total_rounds), wins_by_class, loss_by_class, errors

Create a match grid

In [22]:
match_grid = {}

Setup pairings for the tournament

In [23]:
# setup available players and adversaries, note that duplicate pairings are automatically removed

players = ["random", "llm_mistral", "llm_llama3", "llm_gpt4", "rl_rules_trained", "rl_llama3_trained", "rl_gpt4_trained", "rl_mistral_trained"]
adversaries = ["random", "llm_mistral", "llm_llama3", "llm_gpt4", "ai", "rl_rules_trained", "rl_llama3_trained", "rl_gpt4_trained", "rl_mistral_trained"]

In [24]:
class_grid = {}
match_paring = []

# create a cartesian product of all players and adversaries
for player in players:
    for adversary in adversaries:
        if (player, adversary) in match_grid:
            continue
        if (adversary, player) in match_grid:
            continue
        if adversary == player:
            continue
        match_paring.append((player, adversary))

for player, adversary in tqdm(match_paring):
    try:
        wins, losses, ties, avg_rounds, wins_by_class, loss_by_class, errors = start_game(player=player, adversary=adversary)
        match_grid[(player, adversary)] = (wins, losses, ties, avg_rounds)
        match_grid[(adversary, player)] = (losses, wins, ties, avg_rounds)

        class_grid[(player, adversary)] = (wins_by_class, loss_by_class)
        class_grid[(adversary, player)] = (loss_by_class, wins_by_class)
    except Exception as e:
        print(f"Error: {e} on {player} vs {adversary}")
        match_grid[(player, adversary)] = (0, 0, 0, 0)
        match_grid[(adversary, player)] = (0, 0, 0, 0)


  0%|          | 0/64 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response:  2: Move 5ft closer to the enemy (while still prone)
3: Cast Magic Missile (1st level spell) on the enemy
4: Stand up from prone
5: Use a Bonus Action to take the Disengage action
6: Use a Reaction to dodge an attack from the enemy
7: Do nothing and end turn (remaining in prone position)

Choose the number corresponding to the action you would like to take:
2: Move 5ft closer to the enemy (while still prone)
(If the move places you in the same space as the enemy, use the opportunity attack rules to determine if the enemy attacks you)


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


list index out of range
unusual response:  2: Move 5ft (use reaction to stand up from prone)
Then, once standing:
3: Cast Mage Armor (Bonus Action)
4: Cast Shield (Bonus Action)
5: Attack enemy with Cantrip (Action)
list index out of range
unusual response:  3: Move (use a bonus action to stand up from prone) and Cast Shield (1st level spell) with a reaction. Then, Cast Magic Missile (1st level spell) with an action for the remaining turn.

Here's the reasoning:

1. Move (bonus action): Since you are currently prone and have no available movement, you need to use a bonus action to stand up.

2. Cast Shield (Reaction): By casting Shield on yourself, you will gain a bonus to your AC (Armor Class) for the next minute, which will help mitigate damage from the enemy wizard.

3. Cast Magic Missile (Action): Magic Missile is a ranged spell attack that targets up to 3 creatures within 120 feet of you. Given the layout of the map, you can target the enemy wizard with this spell. It deals 3d4 + 

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 4: stand up
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
list index out of range
unusual response: 3: end my turn
Reward: 10
Round 5
list index out of range
unusual response: 2: cast a spell
Reward: 10
Round 6
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction fo

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 0
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 0
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: 0
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: -10
Round 25
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 10, Losses: 17, Ties: 3


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 2
Reward: -10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 5
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 9
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_at

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: 0
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 29
Reward: -10
Round 30
gomerin: Reaction for opportunity_attack:
Reward: 10
Wins: 9, Losses: 20, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 14
Reward: -10
Round 15
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 5, Losses: 25, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 5, Losses: 25, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: 0
Round 12
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 21
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 10, Losses: 18, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
Reward: 10
Round 9
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 10
Round 12
Reward: 10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
gomerin: Reaction for opportunity_a

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response: 2: Cast Spell (using a Level 1 slot)
list index out of range
unusual response: 2: Cast a 1st level spell
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
Reward: 10
Round 4
list index out of range
unusual response: 2: Cast a spell
list index out of range
unusual response: 4: Stand up
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 3: cast healing word
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 2: Cast Healing Word on myself
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 5
Reward: -10
Round 6
list index out of range
unusual response: 3: End my turn
list index out of range
unusual response: 3: Cast Aid (to stand up)
Reward: -10
Round 7
list index out of range
unusual response: 8: Move 5ft down
list

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: 0
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 28
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
Reward: 10
Round 29
Reward: 10
Round 30
Reward: -10
Wins

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 9
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 10
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
list index out of range
unusual response:  12: move 5ft down and to the left : move 5ft down and to the left
Reward: -10
Round 18
Reward: -10
Round 19
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: 0
Wins: 13, Losses: 16, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: 0
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 0
Round 28
Reward: 0
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 11, Losses: 16, Ties: 3


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
list index out of range
unusual response:  3: Move 5ft to the left, then as a bonus action, Stand Up (to get out of prone condition) and then as a reaction, Attack of Opportunity against the enemy since it is also prone. If I have a ranged weapon ready, I'll use it for the Attack of Opportunity, otherwise I'll use a spell if I have one prepared and available.
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 0
Round 28


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: 0
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 15
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 16
Reward: -10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 14, Losses: 15, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 2
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 7
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
list index out of range
unusual response: 8: end my turn
Reward: -10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 13
Reward: 10
Round 14
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 15
Reward: -10
Round 16
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 17
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportu

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response:  2: Action: Move 5ft, Reaction: Stand up (from prone)

After standing up, I will move 5ft towards the enemy (P to E movement on the map). This places me in a position to attack the enemy on my next turn.
list index out of range
unusual response:  2: Stand up (Dash action to stand up from prone)
3: Move 5ft towards enemy (Move action)
4: Attack enemy with melee weapon (Action Surge to use an additional action)

The chosen sequence would be:
2: Stand up
3: Move 5ft towards enemy
4: Attack enemy with melee weapon
list index out of range
unusual response:  2: Action: Stand up (use reaction)
Bonus Action: Attack enemy with melee weapon (Action Surge from Fighter's second wind feature)

Explanation: As P is currently prone, the first action will be to stand up using a reaction. Then, using the Action Surge feature, P can make an additional attack with his melee weapon. This way, P can attack the enemy twice in one turn, increasing the chances

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: -10
Round 18
list index out of range
unusual response: 4: cast Bless on myself
Reward: -10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
list index out of range
unusual response: 17: cast the  guiding_bolt spell
Reward: 10
Round 23
list index out of range
unusual response: 4: end my turn
Reward: -10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 11, Losses: 19, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 2
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 10
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 11
gomerin: Reaction for opportunity_attac

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
list index out of range
unusual response: 8: Cast Magic Missile
Reward: 10
Round 5
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: 10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: 0
Round 22
Reward: -10
Round 23
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 9, Losses: 20, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
list index out of range
unusual response: 3: go prone
Reward: 10
Round 5
Reward: 0
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
list index out of range
unusual response: 3: move 3ft towards enemy
Reward: -10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
list index out of range
unusual response: 3: move 3ft towards enemy
Reward: 0
Round 25
Reward: 10
Round 26
Reward: 10
Round 27
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 3: cast Bless on myself
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 9, Losses: 19, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response: 3: end my turn
Reward: 0
Round 2
Reward: -10
Round 3
list index out of range
unusual response: 8: move 5ft to the right
Reward: -10
Round 4
list index out of range
unusual response: 3: Cast a spell (using a level 1 spell slot)
Reward: -10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
list index out of range
unusual response: 3: end my turn
Reward: -10
Round 13
Reward: -10
Round 14
list index out of range
unusual response: 8: stand action
Reward: -10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: 10
Round 29
Reward: -10
Rou

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
Reward: -10
Round 7
list index out of range
unusual response: 3: Cast Bless on self
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 8
Reward: 10
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
list index out of range
unusual response: 12: end my turn
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
list index out of range
unusual response: 3: Cast Magic Missile
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 8, Losses: 22, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
Reward: -10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 13
Reward: 10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 15
Reward: 10
Round 16
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
gomerin: Reaction for opportunity_attack:
R

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response:  3: Move 5ft (to stand up)
1: Attack enemy with Healing Word (bonus action)
2: Attack enemy with Staff (action)
list index out of range
unusual response:  3: Move 3ft towards the enemy
1: Attack enemy with Channel Divinity (Bonus Action)
list index out of range
unusual response:  3: Move 3ft to get out of the prone condition, then use a bonus action to stand up.
1: Attack enemy with Light Crossbow (1d10+3) as a regular action. If I have a bolt in my inventory, use that. If not, use my crossbow's ammunition.
1 (Reaction): Use a Channel Divinity (Turn Undead) reaction to weaken the enemy if there's an undead within 30 feet of the enemy or if the enemy is an undead.
1 (Bonus): Use a Healing Word to regain 1d4 + 2 hit points for myself. If I have a spell slot of 2nd level or higher, I can cast Cure Wounds instead (1d8+2 hit points).
list index out of range
unusual response:  3: Move 5 feet up, get up from prone
4: Use Healing Word (bonus ac

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
list index out of range
unusual response: 4: Stand up
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 6
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 8
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
gomerin: R

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 3
Reward: -10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
Reward: -10
Round 9
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_atta

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 10
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 0
Round 25
Reward: 10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: 10
Wins: 17, Losses: 12, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 14, Losses: 16, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: 10
Wins: 14, Losses: 16, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: 10
Round 29
Reward: 0
Round 30
Reward: 10
Wins: 18, Losses: 11, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 8
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 10
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 11
Reward: 10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 14
Reward: 10
Round 15
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 16
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
gomerin: Reaction for opportunity_attack:
gomerin

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
list index out of range
unusual response:  2: Stand up (Dash action to stand up from prone)
3: Healing Word (1st level Cleric spell slot)
4: Attack enemy with melee weapon (Assuming you have one)
list index out of range
unusual response:  2: Stand up (Dexterity saving throw to end prone condition), then 3: Cast "Cure Wounds" (Level 1 spell slot), then 5: Move 5ft towards enemy (closing the distance), then 6: Attack enemy with melee weapon.
list index out of range
unusual response:  3: Move 5ft towards the enemy and use a Level 1 spell slot (Cure Wounds) to regain health

This action will allow me to move closer to the enemy, positioning me for a potential melee attack in the next round, while also regaining some health to ensure my survival. The spell Cure Wounds can heal up to 8+2 (due to my Wisdom modifier) hit points, which should be sufficient to bring my health back to full. If the enemy is still dodging, there is a chance that the attack may miss, but 

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
list index out of range
unusual response: 3: end my turn
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
gomerin: Reaction for shield:
gomerin: Reaction for shield:
gomerin: Reaction for shield:
Reward: 10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
list index out of range
unusual response: 3: end my turn
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
gomerin: Reaction for opportunity_at

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: 10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
list index out of range
unusual response: {"action":12}
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: 0
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: 10
Wins: 21, Losses: 8, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 6
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_at

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 0
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: 0
Round 26
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 20, Losses: 8, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 6
Reward: 0
Round 7
Reward: 10
Round 8
Reward: 0
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: 10
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: 0
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 14, Losses: 13, Ties: 3


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: 0
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
Reward: -10
Round 9
Reward: 0
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: 10
Round 27
Reward: 0
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 12, Losses: 15, Ties: 3


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 0
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 10
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 16
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 17
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: 0
Round 26
gomerin: Reaction for opportunity_at

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
list index out of range
unusual response:  2: Move 5ft and stand up (since currently Prone)
3: Cast Shield spell (using a level 1 slot)
4: Attack enemy with Magic Missile (3 darts)
list index out of range
unusual response:  2: Move 5ft towards the enemy (using a bonus action)
3: Cast Mage Armor (using a bonus action)
4: Cast Shield (using a reaction)
5: Attack enemy with a spell (using an action)
6: Attack enemy with a cantrip (using an action)
Please choose the number corresponding to the action you would like to take:

3: Cast Shield (using a reaction)
list index out of range
unusual response:  3: Move using a bonus action and cast Mage Armor

Explanation: Given the character is currently prone and doesn't have any movement, I would suggest using a bonus action to stand up (which is usually considered part of the movement) and then casting Mage Armor to regain some lost hit points. After that, I'd choose the next most suitable action, such as attacking the 

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 3
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 2: stand up and move 5ft towards enemy
list index out of range
unusual response: 4: Stand Up
Reward: 0
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 10
Round 12
Reward: 10
Round 13
list index out of range
unusual response: 2: stand up
Reward: 10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 15
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 16
list index out of range
unusual response: 8: stand action
Reward: -10
Round 17
Reward: -10
Round 18
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 19
list index out of range
unusual response: 4: Cast a level 1 spell
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: -1

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 0
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: 10
Round 27
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 20, Losses: 9, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
Reward: -10
Round 7
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 9
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_atta

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 0
Round 7
Reward: 0
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 0
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: 0
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: 0
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 15, Losses: 10, Ties: 5


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: -10
Round 5
Reward: -10
Round 6
Reward: 0
Round 7
Reward: -10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: 10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 13, Losses: 16, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 0
Round 22
Reward: 10
Round 23
Reward: 0
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 16, Losses: 12, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 19
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 20
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 21
Reward: 10
Round 22
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 23
Reward: 10


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
list index out of range
unusual response:  3: Move 5ft (to get out of prone)
1: Attack enemy with my level 1 spell (Cure Wounds)
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
list index out of range
unusual response:  2: Action: Move 5ft, Reaction: Dash action to move 10ft total, Bonus Action: Attack with a melee weapon (Action Surge if available)
list index out of range
unusual response:  2: Action: Stand up (Dexterity saving throw to end prone condition)
Bonus Action: Attack enemy with melee weapon (Action Surge if available)
Reaction: Sentinel reaction to make an opportunity attack if enemy moves out of my reach
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response:  3: Move 5ft towards the enemy (since I'm prone, I can move half my speed without using movement)
2: Action: Stand Up (to no longer be prone)
4: Action: Attack enemy with melee weapon (since I'm no

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
list index out of range
unusual response: 3: Cast Light to illuminate the area and gain advantage on attack rolls
list index out of range
unusual response: 3: Cast Healing Word to regain some health
Reward: 0
Round 10
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 11
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 13
Reward: -10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 26
R

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: 10
Round 16
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
gomerin: Reaction for opportunity_attack:
Reward: 10
Wins: 17, Losses: 13, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
Reward: -10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 7
Reward: 10
Round 8
Reward: 0
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 14
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 1

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 0
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 13
Reward: -10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 12, Losses: 17, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 0
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: -10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 0
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: 10
Round 24
Reward: 10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 30
Reward: 10
Wins: 21, Losses: 7, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: 0
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 19
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 13, Losses: 16, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 20
Reward: -10
Round 21
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: 10
Round 27
Reward: 10
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
Reward: 10
Wins: 23, Losses: 7, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response:  3: Action Surge (use Action Surge to take an additional action)
2: Dash (Dash to stand up and move 20 ft. in a straight line, ignoring difficult terrain)
Attack action (Attack the enemy with a melee weapon)

Actions: Dash, Attack (Action Surge)
Bonus actions: 1
Reactions: 1

First, I will use my Dash action to stand up and move closer to the enemy, ignoring any difficult terrain. Then, I will use my Action Surge to take an additional action and attack the enemy with a melee weapon. Finally, I will use my remaining bonus action and/or reaction as needed.
list index out of range
unusual response:  2: Move 5ft to a position where I have line of sight to the enemy, then use my reaction to stand up from prone.

Then, for my bonus action, I will take the Dash action to move another 5ft closer to the enemy.

Finally, for my action, I will use my Action Surge (as a 2nd level fighter) to attack the enem

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
list index out of range
unusual response: 3: cast Bless
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 0
Round 7
Reward: 10
Round 8
list index out of range
unusual response: 2: end my turn
list index out of range
unusual response: 5: stand up
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
list index out of range
unusual response: 2: Cast Healing Word
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 0
Round 12
list index out of range
unusual response: 3: Cast a Light spell (1st level spell)
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
list index out of range
unusual response: 3: cast a spell (I'd like to use Cure Wounds on my

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: -10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: 0
Round 26
Reward: 10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: 10
Wins: 14, Losses: 15, Ties: 1


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 2
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 6
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 7
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 10
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 11
Reward: 10
Round 12
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 13
Rewa

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: -10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 0
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: 0
Round 27
Reward: 10
Round 28
Reward: 10
Round 29
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 30
Reward: 10
Wins: 18, Losses: 10, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 0
Round 4
Reward: -10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: 0
Round 11
Reward: 10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: 10
Wins: 19, Losses: 9, Ties: 2


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: -10
Round 3
Reward: -10
Round 4
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: -10
Round 14
Reward: 10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: -10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: 10
Round 25
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: 10
Round 29
Reward: 10
Round 30
Reward: -10
Wins: 13, Losses: 17, Ties: 0


Generate Report about the matches

In [60]:
import pandas as pd

# setup a pandas table to plot the wins, losses, and ties for each matchup
df = pd.DataFrame(match_grid).T
df.columns = ['Wins', 'Losses', 'Ties', 'AVG Rounds']
df = df.sort_values('Wins', ascending=False)
df

Unnamed: 0,Unnamed: 1,Wins,Losses,Ties,AVG Rounds
rl_gpt4_trained,llm_mistral,24.0,5.0,1.0,2.966667
rl_mistral_trained,ai,24.0,6.0,0.0,1.200000
rl_llama3_trained,random,23.0,5.0,2.0,6.533333
llm_mistral,random,23.0,7.0,0.0,5.533333
rl_mistral_trained,random,23.0,7.0,0.0,3.500000
...,...,...,...,...,...
random,rl_gpt4_trained,6.0,22.0,2.0,7.133333
random,rl_rules_trained,6.0,22.0,2.0,7.633333
ai,rl_mistral_trained,6.0,24.0,0.0,1.200000
llm_mistral,rl_gpt4_trained,5.0,24.0,1.0,2.966667


In [61]:
leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)

with open("leaderboard_all.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\begin{tabular}{|l|c|c|c|c|}
\hline
\textbf{Agent} & \textbf{Wins} & \textbf{Losses} & \textbf{Ties} & \textbf{AVG Rounds} \\
\hline
"""
    f.write(header + "\n")
    for index, row in leaderboard.iterrows():
        player_str = index.replace('_', '\_')
        f.write(f"{player_str} & {row['Wins']} & {row['Losses']} & {row['Ties']} & {row['AVG Rounds']:.2f} \\\\\n")
    footer = r"""
\hline
\end{tabular}
\caption{Leaderboard for D\&D Four Classes Tournament: LLMs vs RL Agents}
\label{tab:dnd-four-classes-leaderboard}
\end{table}
"""
    f.write(footer)

In [62]:
# create a leaderboard on the most wins

leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)
leaderboard.to_csv("leaderboard_all.csv")



leaderboard

Unnamed: 0,Wins,Losses,Ties,AVG Rounds
rl_gpt4_trained,146.0,86.0,8.0,49.3
rl_mistral_trained,142.0,85.0,13.0,66.5
rl_rules_trained,135.0,93.0,12.0,77.566667
rl_llama3_trained,131.0,94.0,15.0,86.933333
ai,123.0,115.0,2.0,28.6
llm_gpt4,116.0,121.0,3.0,42.866667
llm_mistral,107.0,128.0,5.0,37.9
llm_llama3,79.0,149.0,12.0,52.066667
random,61.0,169.0,10.0,53.8


Dump results to CSV

In [27]:
df.to_csv("match_results_all.csv")

In [28]:
# create a table of how many times an agent has "won" against another agent
win_table = {}
lost_against = {}
for (player, adversary) in match_grid.keys():
    wins, losses, _, _ = match_grid[(player, adversary)]
    win_table[player] = win_table.get(player, 0)
    if wins > losses:
        win_table[player] += 1
    else:
        losses = lost_against.get(player, [])
        losses.append(adversary)
        lost_against[player] =  losses

lost_against
win_table

# create a table ranking the agents by how many times they have won
df = pd.DataFrame(win_table.items(), columns=["Agent", "Wins"])
df = df.sort_values('Wins', ascending=False)
df.to_csv("agent_ranking_all.csv")
df


Unnamed: 0,Agent,Wins
5,rl_rules_trained,6
6,rl_llama3_trained,6
7,rl_gpt4_trained,6
4,ai,5
8,rl_mistral_trained,5
3,llm_gpt4,4
1,llm_mistral,3
2,llm_llama3,1
0,random,0


Dump CSV with class matchups

In [29]:
# show the class grid

print("Class Grid")
print(class_grid)
# Class Grid: {('random', 'llm_mistral'): ({'wizard-2': 19, 'fighter-2': 9, 'rogue-2': 10}, {'fighter-2': 32, 'rogue-2': 6, 'wizard-2': 8}), ('llm_mistral', 'random'): ({'fighter-2': 32, 'rogue-2': 6, 'wizard-2': 8}, {'wizard-2': 19, 'fighter-2': 9, 'rogue-2': 10}), ('random', 'llm_llama3'): ({'wizard-2': 17, 'fighter-2': 3, 'rogue-2': 26}, {'rogue-2': 10, 'fighter-2': 19, 'wizard-2': 15}), ('llm_llama3', 'random'): ({'rogue-2': 10, 'fighter-2': 19, 'wizard-2': 15}, {'wizard-2': 17, 'fighter-2': 3, 'rogue-2': 26}), ('random', 'llm_gpt4'): ({'rogue-2': 14, 'wizard-2': 22, 'fighter-2': 4}, {'wizard-2': 20, 'fighter-2': 11, 'rogue-2': 15}), ('llm_gpt4', 'random'): ({'wizard-2': 20, 'fighter-2': 11, 'rogue-2': 15}, {'rogue-2': 14, 'wizard-2': 22, 'fighter-2': 4}), ('random', 'ai'): ({'fighter-2': 14, 'rogue-2': 22, 'wizard-2': 5}, {'rogue-2': 11, 'wizard-2': 32, 'fighter-2': 6}), ('ai', 'random'): ({'rogue-2': 11, 'wizard-2': 32, 'fighter-2': 6}, {'fighter-2': 14, 'rogue-2': 22, 'wizard-2': 5}), ('random', 'rl_rules_trained'): ({'rogue-2': 18, 'fighter-2': 5, 'wizard-2': 13}, {'wizard-2': 18, 'rogue-2': 15, 'fighter-2': 15}), ('rl_rules_trained', 'random'): ({'wizard-2': 18, 'rogue-2': 15, 'fighter-2': 15}, {'rogue-2': 18, 'fighter-2': 5, 'wizard-2': 13}), ('random', 'rl_llama3_trained'): ({'fighter-2': 8, 'rogue-2': 21, 'wizard-2': 7}, {'rogue-2': 10, 'wizard-2': 20, 'fighter-2': 21}), ('rl_llama3_trained', 'random'): ({'rogue-2': 10, 'wizard-2': 20, 'fighter-2': 21}, {'fighter-2': 8, 'rogue-2': 21, 'wizard-2': 7}), ('random', 'rl_gpt4_trained'): ({'fighter-2': 9, 'wizard-2': 6, 'rogue-2': 15}, {'fighter-2': 21, 'wizard-2': 20, 'rogue-2': 7}), ('rl_gpt4_trained', 'random'): ({'fighter-2': 21, 'wizard-2': 20, 'rogue-2': 7}, {'fighter-2': 9, 'wizard-2': 6, 'rogue-2': 15}), ('random', 'rl_mistral_trained'): ({'rogue-2': 15, 'wizard-2': 11, 'fighter-2': 6}, {'fighter-2': 18, 'rogue-2': 10, 'wizard-2': 27}), ('rl_mistral_trained', 'random'): ({'fighter-2': 18, 'rogue-2': 10, 'wizard-2': 27}, {'rogue-2': 15, 'wizard-2': 11, 'fighter-2': 6}), ('llm_mistral', 'llm_llama3'): ({'rogue-2': 19, 'wizard-2': 21, 'fighter-2': 13}, {'rogue-2': 17, 'fighter-2': 13, 'wizard-2': 7}), ('llm_llama3', 'llm_mistral'): ({'rogue-2': 17, 'fighter-2': 13, 'wizard-2': 7}, {'rogue-2': 19, 'wizard-2': 21, 'fighter-2': 13}), ('llm_mistral', 'llm_gpt4'): ({'rogue-2': 20, 'wizard-2': 22, 'fighter-2': 8}, {'rogue-2': 12, 'fighter-2': 21, 'wizard-2': 7}), ('llm_gpt4', 'llm_mistral'): ({'rogue-2': 12, 'fighter-2': 21, 'wizard-2': 7}, {'rogue-2': 20, 'wizard-2': 22, 'fighter-2': 8}), ('llm_mistral', 'ai'): ({'rogue-2': 21, 'wizard-2': 6, 'fighter-2': 15}, {'wizard-2': 28, 'rogue-2': 7, 'fighter-2': 12}), ('ai', 'llm_mistral'): ({'wizard-2': 28, 'rogue-2': 7, 'fighter-2': 12}, {'rogue-2': 21, 'wizard-2': 6, 'fighter-2': 15}), ('llm_mistral', 'rl_rules_trained'): ({'wizard-2': 14, 'fighter-2': 18, 'rogue-2': 13}, {'fighter-2': 17, 'rogue-2': 8, 'wizard-2': 8}), ('rl_rules_trained', 'llm_mistral'): ({'fighter-2': 17, 'rogue-2': 8, 'wizard-2': 8}, {'wizard-2': 14, 'fighter-2': 18, 'rogue-2': 13}), ('llm_mistral', 'rl_llama3_trained'): ({'rogue-2': 15, 'fighter-2': 14, 'wizard-2': 12}, {'rogue-2': 13, 'fighter-2': 21, 'wizard-2': 12}), ('rl_llama3_trained', 'llm_mistral'): ({'rogue-2': 13, 'fighter-2': 21, 'wizard-2': 12}, {'rogue-2': 15, 'fighter-2': 14, 'wizard-2': 12}), ('llm_mistral', 'rl_gpt4_trained'): ({'rogue-2': 12, 'fighter-2': 8, 'wizard-2': 18}, {'wizard-2': 16, 'fighter-2': 18, 'rogue-2': 12}), ('rl_gpt4_trained', 'llm_mistral'): ({'wizard-2': 16, 'fighter-2': 18, 'rogue-2': 12}, {'rogue-2': 12, 'fighter-2': 8, 'wizard-2': 18}), ('llm_mistral', 'rl_mistral_trained'): ({'wizard-2': 22, 'fighter-2': 8, 'rogue-2': 15}, {'rogue-2': 8, 'wizard-2': 14, 'fighter-2': 17}), ('rl_mistral_trained', 'llm_mistral'): ({'rogue-2': 8, 'wizard-2': 14, 'fighter-2': 17}, {'wizard-2': 22, 'fighter-2': 8, 'rogue-2': 15}), ('llm_llama3', 'llm_gpt4'): ({'wizard-2': 14, 'rogue-2': 21, 'fighter-2': 6}, {'wizard-2': 7, 'rogue-2': 13, 'fighter-2': 26}), ('llm_gpt4', 'llm_llama3'): ({'wizard-2': 7, 'rogue-2': 13, 'fighter-2': 26}, {'wizard-2': 14, 'rogue-2': 21, 'fighter-2': 6}), ('llm_llama3', 'ai'): ({'rogue-2': 17, 'fighter-2': 20, 'wizard-2': 5}, {'wizard-2': 25, 'rogue-2': 16, 'fighter-2': 6}), ('ai', 'llm_llama3'): ({'wizard-2': 25, 'rogue-2': 16, 'fighter-2': 6}, {'rogue-2': 17, 'fighter-2': 20, 'wizard-2': 5}), ('llm_llama3', 'rl_rules_trained'): ({'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 20}, {'rogue-2': 8, 'fighter-2': 25, 'wizard-2': 16}), ('rl_rules_trained', 'llm_llama3'): ({'rogue-2': 8, 'fighter-2': 25, 'wizard-2': 16}, {'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 20}), ('llm_llama3', 'rl_llama3_trained'): ({'rogue-2': 19, 'fighter-2': 10, 'wizard-2': 12}, {'fighter-2': 32, 'wizard-2': 15, 'rogue-2': 2}), ('rl_llama3_trained', 'llm_llama3'): ({'fighter-2': 32, 'wizard-2': 15, 'rogue-2': 2}, {'rogue-2': 19, 'fighter-2': 10, 'wizard-2': 12}), ('llm_llama3', 'rl_gpt4_trained'): ({'fighter-2': 11, 'rogue-2': 18, 'wizard-2': 13}, {'fighter-2': 23, 'rogue-2': 8, 'wizard-2': 17}), ('rl_gpt4_trained', 'llm_llama3'): ({'fighter-2': 23, 'rogue-2': 8, 'wizard-2': 17}, {'fighter-2': 11, 'rogue-2': 18, 'wizard-2': 13}), ('llm_llama3', 'rl_mistral_trained'): ({'fighter-2': 5, 'rogue-2': 21, 'wizard-2': 11}, {'rogue-2': 14, 'fighter-2': 30, 'wizard-2': 6}), ('rl_mistral_trained', 'llm_llama3'): ({'rogue-2': 14, 'fighter-2': 30, 'wizard-2': 6}, {'fighter-2': 5, 'rogue-2': 21, 'wizard-2': 11}), ('llm_gpt4', 'ai'): ({'fighter-2': 33, 'rogue-2': 6, 'wizard-2': 5}, {'wizard-2': 27, 'fighter-2': 8, 'rogue-2': 10}), ('ai', 'llm_gpt4'): ({'wizard-2': 27, 'fighter-2': 8, 'rogue-2': 10}, {'fighter-2': 33, 'rogue-2': 6, 'wizard-2': 5}), ('llm_gpt4', 'rl_rules_trained'): ({'rogue-2': 18, 'wizard-2': 24, 'fighter-2': 6}, {'rogue-2': 16, 'fighter-2': 17, 'wizard-2': 9}), ('rl_rules_trained', 'llm_gpt4'): ({'rogue-2': 16, 'fighter-2': 17, 'wizard-2': 9}, {'rogue-2': 18, 'wizard-2': 24, 'fighter-2': 6}), ('llm_gpt4', 'rl_llama3_trained'): ({'rogue-2': 21, 'fighter-2': 4, 'wizard-2': 13}, {'wizard-2': 11, 'fighter-2': 19, 'rogue-2': 19}), ('rl_llama3_trained', 'llm_gpt4'): ({'wizard-2': 11, 'fighter-2': 19, 'rogue-2': 19}, {'rogue-2': 21, 'fighter-2': 4, 'wizard-2': 13}), ('llm_gpt4', 'rl_gpt4_trained'): ({'fighter-2': 13, 'wizard-2': 10, 'rogue-2': 12}, {'fighter-2': 22, 'wizard-2': 13, 'rogue-2': 14}), ('rl_gpt4_trained', 'llm_gpt4'): ({'fighter-2': 22, 'wizard-2': 13, 'rogue-2': 14}, {'fighter-2': 13, 'wizard-2': 10, 'rogue-2': 12}), ('llm_gpt4', 'rl_mistral_trained'): ({'rogue-2': 15, 'wizard-2': 14, 'fighter-2': 6}, {'rogue-2': 17, 'wizard-2': 18, 'fighter-2': 17}), ('rl_mistral_trained', 'llm_gpt4'): ({'rogue-2': 17, 'wizard-2': 18, 'fighter-2': 17}, {'rogue-2': 15, 'wizard-2': 14, 'fighter-2': 6}), ('rl_rules_trained', 'ai'): ({'fighter-2': 27, 'rogue-2': 9, 'wizard-2': 13}, {'wizard-2': 21, 'rogue-2': 8, 'fighter-2': 9}), ('ai', 'rl_rules_trained'): ({'wizard-2': 21, 'rogue-2': 8, 'fighter-2': 9}, {'fighter-2': 27, 'rogue-2': 9, 'wizard-2': 13}), ('rl_rules_trained', 'rl_llama3_trained'): ({'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 12}, {'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 12}), ('rl_llama3_trained', 'rl_rules_trained'): ({'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 12}, {'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 12}), ('rl_rules_trained', 'rl_gpt4_trained'): ({'fighter-2': 6, 'wizard-2': 19, 'rogue-2': 8}, {'rogue-2': 11, 'wizard-2': 14, 'fighter-2': 20}), ('rl_gpt4_trained', 'rl_rules_trained'): ({'rogue-2': 11, 'wizard-2': 14, 'fighter-2': 20}, {'fighter-2': 6, 'wizard-2': 19, 'rogue-2': 8}), ('rl_rules_trained', 'rl_mistral_trained'): ({'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 10}, {'wizard-2': 19, 'fighter-2': 13, 'rogue-2': 6}), ('rl_mistral_trained', 'rl_rules_trained'): ({'wizard-2': 19, 'fighter-2': 13, 'rogue-2': 6}, {'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 10}), ('rl_llama3_trained', 'ai'): ({'rogue-2': 8, 'fighter-2': 24, 'wizard-2': 14}, {'wizard-2': 22, 'rogue-2': 14, 'fighter-2': 8}), ('ai', 'rl_llama3_trained'): ({'wizard-2': 22, 'rogue-2': 14, 'fighter-2': 8}, {'rogue-2': 8, 'fighter-2': 24, 'wizard-2': 14}), ('rl_llama3_trained', 'rl_gpt4_trained'): ({'rogue-2': 17, 'wizard-2': 14, 'fighter-2': 10}, {'wizard-2': 19, 'rogue-2': 10, 'fighter-2': 17}), ('rl_gpt4_trained', 'rl_llama3_trained'): ({'wizard-2': 19, 'rogue-2': 10, 'fighter-2': 17}, {'rogue-2': 17, 'wizard-2': 14, 'fighter-2': 10}), ('rl_llama3_trained', 'rl_mistral_trained'): ({'wizard-2': 24, 'rogue-2': 13, 'fighter-2': 6}, {'rogue-2': 9, 'fighter-2': 14, 'wizard-2': 24}), ('rl_mistral_trained', 'rl_llama3_trained'): ({'rogue-2': 9, 'fighter-2': 14, 'wizard-2': 24}, {'wizard-2': 24, 'rogue-2': 13, 'fighter-2': 6}), ('rl_gpt4_trained', 'ai'): ({'fighter-2': 30, 'rogue-2': 12, 'wizard-2': 7}, {'rogue-2': 14, 'wizard-2': 21, 'fighter-2': 6}), ('ai', 'rl_gpt4_trained'): ({'rogue-2': 14, 'wizard-2': 21, 'fighter-2': 6}, {'fighter-2': 30, 'rogue-2': 12, 'wizard-2': 7}), ('rl_gpt4_trained', 'rl_mistral_trained'): ({'rogue-2': 18, 'wizard-2': 14, 'fighter-2': 10}, {'rogue-2': 13, 'wizard-2': 13, 'fighter-2': 18}), ('rl_mistral_trained', 'rl_gpt4_trained'): ({'rogue-2': 13, 'wizard-2': 13, 'fighter-2': 18}, {'rogue-2': 18, 'wizard-2': 14, 'fighter-2': 10}), ('rl_mistral_trained', 'ai'): ({'fighter-2': 28, 'rogue-2': 15, 'wizard-2': 5}, {'fighter-2': 5, 'wizard-2': 18, 'rogue-2': 19}), ('ai', 'rl_mistral_trained'): ({'fighter-2': 5, 'wizard-2': 18, 'rogue-2': 19}, {'fighter-2': 28, 'rogue-2': 15, 'wizard-2': 5})}


Class Grid
{('random', 'llm_mistral'): ({'cleric-2': 6, 'rogue-2': 10, 'wizard-2': 19, 'fighter-2': 2}, {'cleric-2': 15, 'wizard-2': 10, 'fighter-2': 22, 'rogue-2': 6}), ('llm_mistral', 'random'): ({'cleric-2': 15, 'wizard-2': 10, 'fighter-2': 22, 'rogue-2': 6}, {'cleric-2': 6, 'rogue-2': 10, 'wizard-2': 19, 'fighter-2': 2}), ('random', 'llm_llama3'): ({'wizard-2': 15, 'rogue-2': 13, 'cleric-2': 8, 'fighter-2': 2}, {'cleric-2': 7, 'fighter-2': 17, 'rogue-2': 11, 'wizard-2': 5}), ('llm_llama3', 'random'): ({'cleric-2': 7, 'fighter-2': 17, 'rogue-2': 11, 'wizard-2': 5}, {'wizard-2': 15, 'rogue-2': 13, 'cleric-2': 8, 'fighter-2': 2}), ('random', 'llm_gpt4'): ({'wizard-2': 12, 'rogue-2': 18, 'fighter-2': 3, 'cleric-2': 7}, {'fighter-2': 20, 'wizard-2': 13, 'cleric-2': 12, 'rogue-2': 4}), ('llm_gpt4', 'random'): ({'fighter-2': 20, 'wizard-2': 13, 'cleric-2': 12, 'rogue-2': 4}, {'wizard-2': 12, 'rogue-2': 18, 'fighter-2': 3, 'cleric-2': 7}), ('random', 'ai'): ({'rogue-2': 10, 'cleric-2': 7, 

Convert to latex for publishing

In [63]:
adversaries.sort()

with open("match_grid_all.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|}
\hline"""
    f.write(header + "\n")
    f.write("\\textbf{Agent}")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(" & \\textbf{" + f"{player_str}" + "}")
    f.write(" \\\\\n")
    f.write("\\hline\n")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(f"{player_str}")
        for adversary in adversaries:
            if (player, adversary) in match_grid:
                wins, losses, ties, avg_rounds = match_grid[(player, adversary)]
                f.write(f" & {wins}/{losses}/{ties} ")
            else:
                f.write(" & - ")
        f.write(" \\\\\n")
    footer = r"""
\hline
\end{tabular}%
}
\caption{D\&D Four Classes Tournament: Win/Loss/Tie Matrix}
\label{tab:dnd-four-classes-matrix}
\end{table}"""
    f.write(footer)