In [None]:
from natural20.session import Session
from natural20.gym.dndenv import dndenv, action_type_to_int
from gymnasium import register, envs, make
from natural20.gym.llm_helpers.prompting_utils import action_to_prompt
from natural20.gym.dndenv import embedding_loader
from natural20.event_manager import EventManager
from natural20.gym.dqn.policy import ModelPolicy
from llm_interface import GPT4Interfacer, OllamaInterfacer
import torch
import random
import os
import numpy as np
from tqdm.autonotebook import tqdm

In [2]:
ROUND_PER_MATCH = 30
# setup VLLM endpoints
LLAMA3_URL = "http://localhost:8001/v1"
MISTRAL_URL = "http://localhost:8000/v1"
GPT4_TOKEN = "OPENAI_GPT_TOKEN_HERE"
WEIGHTS_FOLDER = "model_weights_all"
HORIZON_LENGTH = 512
ENABLE_LOGS = True
OUTPUTFOLDER = "output"

In [3]:
event_manager = EventManager()
event_manager.standard_cli()
session = Session(root_path="map_with_obstacles", event_manager=event_manager)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class Agent:
    def action(self, observation, info):
        return random.choice(info['available_moves'])

class CustomAgent(Agent):
    def __init__(self, llm_interface):
        self.llm_interface = llm_interface

    def action(self, observation, info):
        return self.llm_interface.select_action_for_state(observation, info)
    def __str__(self) -> str:
        return "Custom LLM Agent"

class ModelAgent(Agent):
    def __init__(self, model_policy):
        self.model_policy = model_policy

    def action(self, observation, info):
        return self.model_policy.action(observation, info)


Setup the appropriate URLs to your vLLM instances

In [6]:
# def prompt_for_variant(variant):
#     if variant == "llama3":
#         prompt = GPT4Interfacer(debug=False, tools=False, base_url=LLAMA3_URL, api_key="token1234", variant='NousResearch/Meta-Llama-3.1-8B-Instruct')
#     elif variant == "gpt4":
#         prompt = GPT4Interfacer(debug=False, tools=True, api_key=GPT4_TOKEN, variant='gpt-4o-mini')
#     elif variant == "mistral":
#         prompt = GPT4Interfacer(debug=False, tools=False, base_url=MISTRAL_URL, api_key="token1234", variant='mistralai/Mistral-7B-Instruct-v0.3')
#     else:
#         raise ValueError(f"Invalid variant: {variant}")

#     return prompt

def prompt_for_variant(variant):
    if variant == "llama3":
        prompt = OllamaInterfacer(model="llama3.2:latest")
    elif variant == "gpt4":
        prompt = OllamaInterfacer(debug=False, tools=True, api_key=GPT4_TOKEN, variant='gpt-4o-mini')
    elif variant == "mistral":
        prompt = OllamaInterfacer(base_url="http://ubuntu.local:11434", model="mistral:7b")
    elif variant == "deepseek":
        prompt = OllamaInterfacer(model="deepseek-r1:7b")
    else:
        raise ValueError(f"Invalid variant: {variant}")

    return prompt

In [7]:
def increment_result_by_class(type, info, wins_or_loss_by_class=None):
    for p in info[type]:
        if p.class_descriptor() not in wins_or_loss_by_class:
            wins_or_loss_by_class[p.class_descriptor()] = 0
        wins_or_loss_by_class[p.class_descriptor()] += 1


def start_game(player="rl_rules_trained", adversary="llm_llama3", output_file=None):
    player_agent = None
    if player == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"model_best_dnd_egreedy.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player.startswith("llm"):
        prompt = prompt_for_variant(player.split("_")[1])
        player_agent = CustomAgent(prompt)
    elif player == "random":
        player_agent = Agent()
    else:
        raise ValueError(f"Invalid player: {player}")

    # Setup Adversary
    if adversary == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"model_best_dnd_egreedy.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary.startswith("llm"):
        prompt = prompt_for_variant(adversary.split("_")[1])
        adversary_agent = CustomAgent(prompt)
    elif adversary == "ai":
        adversary_agent = None
    elif adversary == "random":
        adversary_agent = Agent()
    else:
        raise ValueError(f"Invalid adversary: {adversary}")

    def reaction_callback(state, reward, done, truncated, info):
        """
        Callback function to be called when the environment is waiting for a reaction from the agent.
        Reactions in DnD are typically reactions to enemy actions, such as opportunity attacks.
        """
        print(f"{info['reactor']}: Reaction for {info['trigger']}:")
        action = player_agent.action(state, info)
        return action

    env = make("dndenv-v0", root_path="map_with_obstacles",
           render_mode="ansi",
           custom_agent=adversary_agent,
           show_logs=ENABLE_LOGS,
           hero_names=[f"player_{player}"],
           enemy_names=[f"player_{adversary}"],
           profiles=lambda: random.choice(['high_elf_fighter','halfling_rogue','high_elf_mage','dwarf_cleric']),
            enemies=lambda: random.choice(['high_elf_fighter','halfling_rogue','high_elf_mage','dwarf_cleric']),
            map_file=lambda: random.choice(['maps/simple_map',\
                                            'maps/complex_map', \
                                            'maps/game_map', \
                                            'maps/walled_map']),
           debug=False)
    wins = 0
    losses = 0
    ties = 0
    errors = 0

    total_rounds = []
    wins_by_class = {}
    loss_by_class = {}

    foldername = os.path.join(OUTPUTFOLDER, f"{player}_{adversary}")
    os.makedirs(foldername, exist_ok=True)
    with open(os.path.join(foldername, "000_summary.txt"), "w") as f:
        f.write(f"Player: {player}\n")
        f.write(f"Adversary: {adversary}\n")

        for round in tqdm(range(ROUND_PER_MATCH), leave=False):
            try:
                print(f"Round {round + 1}")
                outputfile = os.path.join(foldername, f"round_{round + 1}.txt")
                observation, info = env.reset(reaction_callback=reaction_callback, output_file=outputfile)
                last_info = info
                action = player_agent.action(observation, info)
                terminal = False
                steps = 0
                result = None
                while not terminal and steps < HORIZON_LENGTH:
                    steps += 1
                    observation, reward, terminal, truncated, info = env.step(action)
                    last_info = info
                    if not terminal and not truncated:
                        action = player_agent.action(observation, info)

                    if terminal or truncated:
                        print(f"Reward: {reward}")
                        if reward == 10:
                            result = "1.0-0.0"
                            wins += 1
                            increment_result_by_class('players', info, wins_by_class)
                            increment_result_by_class('enemies', info, loss_by_class)
                        elif reward == -10:
                            result = "0.0-1.0"
                            losses += 1
                            increment_result_by_class('players', info, loss_by_class)
                            increment_result_by_class('enemies', info, wins_by_class)
                        else:
                            result = "0.5-0.5"
                            ties += 1
                        break

                total_rounds.append(info['round'])
                f.write(f"Round {round + 1}: {info['round']},{result},{result}\n")
            except Exception as e:
                f.write(f"Round {round + 1}: Error {e}\n")
                print(f"Error: {e}")
                errors += 1
                continue
            f.flush()
        f.write("\n")
        f.write(f"Wins: {wins}, Losses: {losses}, Ties: {ties}\n")
        f.write(f"Errors: {errors}\n")
        f.write(f"Total Rounds: {np.mean(total_rounds)}\n")
    print(f"Wins: {wins}, Losses: {losses}, Ties: {ties}")



    return wins, losses, ties, np.mean(total_rounds), wins_by_class, loss_by_class, errors

Create a match grid

In [8]:
match_grid = {}

Setup pairings for the tournament

In [9]:
# setup available players and adversaries, note that duplicate pairings are automatically removed

players = ["random", "llm_mistral", "llm_llama3","rl_rules_trained","llm_deepseek"]
adversaries = ["random", "llm_mistral", "llm_llama3", "ai", "rl_rules_trained","llm_deepseek"]

In [None]:
class_grid = {}
match_paring = []

# create a cartesian product of all players and adversaries
for player in players:
    for adversary in adversaries:
        if (player, adversary) in match_grid:
            continue
        if (adversary, player) in match_grid:
            continue
        if adversary == player:
            continue
        match_paring.append((player, adversary))

for player, adversary in tqdm(match_paring):
    try:
        wins, losses, ties, avg_rounds, wins_by_class, loss_by_class, errors = start_game(player=player, adversary=adversary)
        match_grid[(player, adversary)] = (wins, losses, ties, avg_rounds)
        match_grid[(adversary, player)] = (losses, wins, ties, avg_rounds)

        class_grid[(player, adversary)] = (wins_by_class, loss_by_class)
        class_grid[(adversary, player)] = (loss_by_class, wins_by_class)
    except Exception as e:
        print(f"Error: {e} on {player} vs {adversary}")
        match_grid[(player, adversary)] = (0, 0, 0, 0)
        match_grid[(adversary, player)] = (0, 0, 0, 0)


Generate Report about the matches

In [None]:
import pandas as pd

# setup a pandas table to plot the wins, losses, and ties for each matchup
df = pd.DataFrame(match_grid).T
df.columns = ['Wins', 'Losses', 'Ties', 'AVG Rounds']
df = df.sort_values('Wins', ascending=False)
df

In [12]:
leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)

with open("leaderboard_all.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\begin{tabular}{|l|c|c|c|c|}
\hline
\textbf{Agent} & \textbf{Wins} & \textbf{Losses} & \textbf{Ties} & \textbf{AVG Rounds} \\
\hline
"""
    f.write(header + "\n")
    for index, row in leaderboard.iterrows():
        player_str = index.replace('_', '\_')
        f.write(f"{player_str} & {row['Wins']} & {row['Losses']} & {row['Ties']} & {row['AVG Rounds']:.2f} \\\\\n")
    footer = r"""
\hline
\end{tabular}
\caption{Leaderboard for D\&D Four Classes Tournament: LLMs vs RL Agents}
\label{tab:dnd-four-classes-leaderboard}
\end{table}
"""
    f.write(footer)

In [None]:
# create a leaderboard on the most wins

leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)
leaderboard.to_csv("leaderboard_all.csv")



leaderboard

Dump results to CSV

In [14]:
df.to_csv("match_results_all.csv")

In [None]:
import pandas as pd

# read the match_result_all.csv
df = pd.read_csv("match_results_all.csv")
df = df.set_index(['player', 'adversary'])

# append a column if a player has beaten its adversary

df['player_won'] = df['Wins'] > df['Losses']
df['adversary_won'] = df['Wins'] < df['Losses']
df['tie'] = df['Wins'] == df['Losses']

# create a pivot table to show the wins, losses, and ties for each player
pivot = df.pivot_table(index='player', values=['player_won','adversary_won'], aggfunc='sum')
# sort by wins and then by the least losses
pivot = pivot.sort_values('player_won', ascending=False)
pivot







In [None]:


# create a table of how many times an agent has "won" against another agent
win_table = {}
lost_against = {}
for (player, adversary) in match_grid.keys():
    wins, losses, _, _ = match_grid[(player, adversary)]
    win_table[player] = win_table.get(player, 0)
    if wins > losses:
        win_table[player] += 1
    else:
        losses = lost_against.get(player, [])
        losses.append(adversary)
        lost_against[player] =  losses

lost_against
win_table

# create a table ranking the agents by how many times they have won
df = pd.DataFrame(win_table.items(), columns=["Agent", "Wins"])
df = df.sort_values('Wins', ascending=False)
df.to_csv("agent_ranking_all.csv")
df


Dump CSV with class matchups

In [None]:
# show the class grid

print("Class Grid")
print(class_grid)
# Class Grid: {('random', 'llm_mistral'): ({'wizard-2': 19, 'fighter-2': 9, 'rogue-2': 10}, {'fighter-2': 32, 'rogue-2': 6, 'wizard-2': 8}), ('llm_mistral', 'random'): ({'fighter-2': 32, 'rogue-2': 6, 'wizard-2': 8}, {'wizard-2': 19, 'fighter-2': 9, 'rogue-2': 10}), ('random', 'llm_llama3'): ({'wizard-2': 17, 'fighter-2': 3, 'rogue-2': 26}, {'rogue-2': 10, 'fighter-2': 19, 'wizard-2': 15}), ('llm_llama3', 'random'): ({'rogue-2': 10, 'fighter-2': 19, 'wizard-2': 15}, {'wizard-2': 17, 'fighter-2': 3, 'rogue-2': 26}), ('random', 'llm_gpt4'): ({'rogue-2': 14, 'wizard-2': 22, 'fighter-2': 4}, {'wizard-2': 20, 'fighter-2': 11, 'rogue-2': 15}), ('llm_gpt4', 'random'): ({'wizard-2': 20, 'fighter-2': 11, 'rogue-2': 15}, {'rogue-2': 14, 'wizard-2': 22, 'fighter-2': 4}), ('random', 'ai'): ({'fighter-2': 14, 'rogue-2': 22, 'wizard-2': 5}, {'rogue-2': 11, 'wizard-2': 32, 'fighter-2': 6}), ('ai', 'random'): ({'rogue-2': 11, 'wizard-2': 32, 'fighter-2': 6}, {'fighter-2': 14, 'rogue-2': 22, 'wizard-2': 5}), ('random', 'rl_rules_trained'): ({'rogue-2': 18, 'fighter-2': 5, 'wizard-2': 13}, {'wizard-2': 18, 'rogue-2': 15, 'fighter-2': 15}), ('rl_rules_trained', 'random'): ({'wizard-2': 18, 'rogue-2': 15, 'fighter-2': 15}, {'rogue-2': 18, 'fighter-2': 5, 'wizard-2': 13}), ('random', 'rl_llama3_trained'): ({'fighter-2': 8, 'rogue-2': 21, 'wizard-2': 7}, {'rogue-2': 10, 'wizard-2': 20, 'fighter-2': 21}), ('rl_llama3_trained', 'random'): ({'rogue-2': 10, 'wizard-2': 20, 'fighter-2': 21}, {'fighter-2': 8, 'rogue-2': 21, 'wizard-2': 7}), ('random', 'rl_gpt4_trained'): ({'fighter-2': 9, 'wizard-2': 6, 'rogue-2': 15}, {'fighter-2': 21, 'wizard-2': 20, 'rogue-2': 7}), ('rl_gpt4_trained', 'random'): ({'fighter-2': 21, 'wizard-2': 20, 'rogue-2': 7}, {'fighter-2': 9, 'wizard-2': 6, 'rogue-2': 15}), ('random', 'rl_mistral_trained'): ({'rogue-2': 15, 'wizard-2': 11, 'fighter-2': 6}, {'fighter-2': 18, 'rogue-2': 10, 'wizard-2': 27}), ('rl_mistral_trained', 'random'): ({'fighter-2': 18, 'rogue-2': 10, 'wizard-2': 27}, {'rogue-2': 15, 'wizard-2': 11, 'fighter-2': 6}), ('llm_mistral', 'llm_llama3'): ({'rogue-2': 19, 'wizard-2': 21, 'fighter-2': 13}, {'rogue-2': 17, 'fighter-2': 13, 'wizard-2': 7}), ('llm_llama3', 'llm_mistral'): ({'rogue-2': 17, 'fighter-2': 13, 'wizard-2': 7}, {'rogue-2': 19, 'wizard-2': 21, 'fighter-2': 13}), ('llm_mistral', 'llm_gpt4'): ({'rogue-2': 20, 'wizard-2': 22, 'fighter-2': 8}, {'rogue-2': 12, 'fighter-2': 21, 'wizard-2': 7}), ('llm_gpt4', 'llm_mistral'): ({'rogue-2': 12, 'fighter-2': 21, 'wizard-2': 7}, {'rogue-2': 20, 'wizard-2': 22, 'fighter-2': 8}), ('llm_mistral', 'ai'): ({'rogue-2': 21, 'wizard-2': 6, 'fighter-2': 15}, {'wizard-2': 28, 'rogue-2': 7, 'fighter-2': 12}), ('ai', 'llm_mistral'): ({'wizard-2': 28, 'rogue-2': 7, 'fighter-2': 12}, {'rogue-2': 21, 'wizard-2': 6, 'fighter-2': 15}), ('llm_mistral', 'rl_rules_trained'): ({'wizard-2': 14, 'fighter-2': 18, 'rogue-2': 13}, {'fighter-2': 17, 'rogue-2': 8, 'wizard-2': 8}), ('rl_rules_trained', 'llm_mistral'): ({'fighter-2': 17, 'rogue-2': 8, 'wizard-2': 8}, {'wizard-2': 14, 'fighter-2': 18, 'rogue-2': 13}), ('llm_mistral', 'rl_llama3_trained'): ({'rogue-2': 15, 'fighter-2': 14, 'wizard-2': 12}, {'rogue-2': 13, 'fighter-2': 21, 'wizard-2': 12}), ('rl_llama3_trained', 'llm_mistral'): ({'rogue-2': 13, 'fighter-2': 21, 'wizard-2': 12}, {'rogue-2': 15, 'fighter-2': 14, 'wizard-2': 12}), ('llm_mistral', 'rl_gpt4_trained'): ({'rogue-2': 12, 'fighter-2': 8, 'wizard-2': 18}, {'wizard-2': 16, 'fighter-2': 18, 'rogue-2': 12}), ('rl_gpt4_trained', 'llm_mistral'): ({'wizard-2': 16, 'fighter-2': 18, 'rogue-2': 12}, {'rogue-2': 12, 'fighter-2': 8, 'wizard-2': 18}), ('llm_mistral', 'rl_mistral_trained'): ({'wizard-2': 22, 'fighter-2': 8, 'rogue-2': 15}, {'rogue-2': 8, 'wizard-2': 14, 'fighter-2': 17}), ('rl_mistral_trained', 'llm_mistral'): ({'rogue-2': 8, 'wizard-2': 14, 'fighter-2': 17}, {'wizard-2': 22, 'fighter-2': 8, 'rogue-2': 15}), ('llm_llama3', 'llm_gpt4'): ({'wizard-2': 14, 'rogue-2': 21, 'fighter-2': 6}, {'wizard-2': 7, 'rogue-2': 13, 'fighter-2': 26}), ('llm_gpt4', 'llm_llama3'): ({'wizard-2': 7, 'rogue-2': 13, 'fighter-2': 26}, {'wizard-2': 14, 'rogue-2': 21, 'fighter-2': 6}), ('llm_llama3', 'ai'): ({'rogue-2': 17, 'fighter-2': 20, 'wizard-2': 5}, {'wizard-2': 25, 'rogue-2': 16, 'fighter-2': 6}), ('ai', 'llm_llama3'): ({'wizard-2': 25, 'rogue-2': 16, 'fighter-2': 6}, {'rogue-2': 17, 'fighter-2': 20, 'wizard-2': 5}), ('llm_llama3', 'rl_rules_trained'): ({'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 20}, {'rogue-2': 8, 'fighter-2': 25, 'wizard-2': 16}), ('rl_rules_trained', 'llm_llama3'): ({'rogue-2': 8, 'fighter-2': 25, 'wizard-2': 16}, {'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 20}), ('llm_llama3', 'rl_llama3_trained'): ({'rogue-2': 19, 'fighter-2': 10, 'wizard-2': 12}, {'fighter-2': 32, 'wizard-2': 15, 'rogue-2': 2}), ('rl_llama3_trained', 'llm_llama3'): ({'fighter-2': 32, 'wizard-2': 15, 'rogue-2': 2}, {'rogue-2': 19, 'fighter-2': 10, 'wizard-2': 12}), ('llm_llama3', 'rl_gpt4_trained'): ({'fighter-2': 11, 'rogue-2': 18, 'wizard-2': 13}, {'fighter-2': 23, 'rogue-2': 8, 'wizard-2': 17}), ('rl_gpt4_trained', 'llm_llama3'): ({'fighter-2': 23, 'rogue-2': 8, 'wizard-2': 17}, {'fighter-2': 11, 'rogue-2': 18, 'wizard-2': 13}), ('llm_llama3', 'rl_mistral_trained'): ({'fighter-2': 5, 'rogue-2': 21, 'wizard-2': 11}, {'rogue-2': 14, 'fighter-2': 30, 'wizard-2': 6}), ('rl_mistral_trained', 'llm_llama3'): ({'rogue-2': 14, 'fighter-2': 30, 'wizard-2': 6}, {'fighter-2': 5, 'rogue-2': 21, 'wizard-2': 11}), ('llm_gpt4', 'ai'): ({'fighter-2': 33, 'rogue-2': 6, 'wizard-2': 5}, {'wizard-2': 27, 'fighter-2': 8, 'rogue-2': 10}), ('ai', 'llm_gpt4'): ({'wizard-2': 27, 'fighter-2': 8, 'rogue-2': 10}, {'fighter-2': 33, 'rogue-2': 6, 'wizard-2': 5}), ('llm_gpt4', 'rl_rules_trained'): ({'rogue-2': 18, 'wizard-2': 24, 'fighter-2': 6}, {'rogue-2': 16, 'fighter-2': 17, 'wizard-2': 9}), ('rl_rules_trained', 'llm_gpt4'): ({'rogue-2': 16, 'fighter-2': 17, 'wizard-2': 9}, {'rogue-2': 18, 'wizard-2': 24, 'fighter-2': 6}), ('llm_gpt4', 'rl_llama3_trained'): ({'rogue-2': 21, 'fighter-2': 4, 'wizard-2': 13}, {'wizard-2': 11, 'fighter-2': 19, 'rogue-2': 19}), ('rl_llama3_trained', 'llm_gpt4'): ({'wizard-2': 11, 'fighter-2': 19, 'rogue-2': 19}, {'rogue-2': 21, 'fighter-2': 4, 'wizard-2': 13}), ('llm_gpt4', 'rl_gpt4_trained'): ({'fighter-2': 13, 'wizard-2': 10, 'rogue-2': 12}, {'fighter-2': 22, 'wizard-2': 13, 'rogue-2': 14}), ('rl_gpt4_trained', 'llm_gpt4'): ({'fighter-2': 22, 'wizard-2': 13, 'rogue-2': 14}, {'fighter-2': 13, 'wizard-2': 10, 'rogue-2': 12}), ('llm_gpt4', 'rl_mistral_trained'): ({'rogue-2': 15, 'wizard-2': 14, 'fighter-2': 6}, {'rogue-2': 17, 'wizard-2': 18, 'fighter-2': 17}), ('rl_mistral_trained', 'llm_gpt4'): ({'rogue-2': 17, 'wizard-2': 18, 'fighter-2': 17}, {'rogue-2': 15, 'wizard-2': 14, 'fighter-2': 6}), ('rl_rules_trained', 'ai'): ({'fighter-2': 27, 'rogue-2': 9, 'wizard-2': 13}, {'wizard-2': 21, 'rogue-2': 8, 'fighter-2': 9}), ('ai', 'rl_rules_trained'): ({'wizard-2': 21, 'rogue-2': 8, 'fighter-2': 9}, {'fighter-2': 27, 'rogue-2': 9, 'wizard-2': 13}), ('rl_rules_trained', 'rl_llama3_trained'): ({'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 12}, {'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 12}), ('rl_llama3_trained', 'rl_rules_trained'): ({'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 12}, {'wizard-2': 13, 'fighter-2': 8, 'rogue-2': 12}), ('rl_rules_trained', 'rl_gpt4_trained'): ({'fighter-2': 6, 'wizard-2': 19, 'rogue-2': 8}, {'rogue-2': 11, 'wizard-2': 14, 'fighter-2': 20}), ('rl_gpt4_trained', 'rl_rules_trained'): ({'rogue-2': 11, 'wizard-2': 14, 'fighter-2': 20}, {'fighter-2': 6, 'wizard-2': 19, 'rogue-2': 8}), ('rl_rules_trained', 'rl_mistral_trained'): ({'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 10}, {'wizard-2': 19, 'fighter-2': 13, 'rogue-2': 6}), ('rl_mistral_trained', 'rl_rules_trained'): ({'wizard-2': 19, 'fighter-2': 13, 'rogue-2': 6}, {'rogue-2': 10, 'wizard-2': 14, 'fighter-2': 10}), ('rl_llama3_trained', 'ai'): ({'rogue-2': 8, 'fighter-2': 24, 'wizard-2': 14}, {'wizard-2': 22, 'rogue-2': 14, 'fighter-2': 8}), ('ai', 'rl_llama3_trained'): ({'wizard-2': 22, 'rogue-2': 14, 'fighter-2': 8}, {'rogue-2': 8, 'fighter-2': 24, 'wizard-2': 14}), ('rl_llama3_trained', 'rl_gpt4_trained'): ({'rogue-2': 17, 'wizard-2': 14, 'fighter-2': 10}, {'wizard-2': 19, 'rogue-2': 10, 'fighter-2': 17}), ('rl_gpt4_trained', 'rl_llama3_trained'): ({'wizard-2': 19, 'rogue-2': 10, 'fighter-2': 17}, {'rogue-2': 17, 'wizard-2': 14, 'fighter-2': 10}), ('rl_llama3_trained', 'rl_mistral_trained'): ({'wizard-2': 24, 'rogue-2': 13, 'fighter-2': 6}, {'rogue-2': 9, 'fighter-2': 14, 'wizard-2': 24}), ('rl_mistral_trained', 'rl_llama3_trained'): ({'rogue-2': 9, 'fighter-2': 14, 'wizard-2': 24}, {'wizard-2': 24, 'rogue-2': 13, 'fighter-2': 6}), ('rl_gpt4_trained', 'ai'): ({'fighter-2': 30, 'rogue-2': 12, 'wizard-2': 7}, {'rogue-2': 14, 'wizard-2': 21, 'fighter-2': 6}), ('ai', 'rl_gpt4_trained'): ({'rogue-2': 14, 'wizard-2': 21, 'fighter-2': 6}, {'fighter-2': 30, 'rogue-2': 12, 'wizard-2': 7}), ('rl_gpt4_trained', 'rl_mistral_trained'): ({'rogue-2': 18, 'wizard-2': 14, 'fighter-2': 10}, {'rogue-2': 13, 'wizard-2': 13, 'fighter-2': 18}), ('rl_mistral_trained', 'rl_gpt4_trained'): ({'rogue-2': 13, 'wizard-2': 13, 'fighter-2': 18}, {'rogue-2': 18, 'wizard-2': 14, 'fighter-2': 10}), ('rl_mistral_trained', 'ai'): ({'fighter-2': 28, 'rogue-2': 15, 'wizard-2': 5}, {'fighter-2': 5, 'wizard-2': 18, 'rogue-2': 19}), ('ai', 'rl_mistral_trained'): ({'fighter-2': 5, 'wizard-2': 18, 'rogue-2': 19}, {'fighter-2': 28, 'rogue-2': 15, 'wizard-2': 5})}


Convert to latex for publishing

In [63]:
adversaries.sort()

with open("match_grid_all.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|}
\hline"""
    f.write(header + "\n")
    f.write("\\textbf{Agent}")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(" & \\textbf{" + f"{player_str}" + "}")
    f.write(" \\\\\n")
    f.write("\\hline\n")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(f"{player_str}")
        for adversary in adversaries:
            if (player, adversary) in match_grid:
                wins, losses, ties, avg_rounds = match_grid[(player, adversary)]
                f.write(f" & {wins}/{losses}/{ties} ")
            else:
                f.write(" & - ")
        f.write(" \\\\\n")
    footer = r"""
\hline
\end{tabular}%
}
\caption{D\&D Four Classes Tournament: Win/Loss/Tie Matrix}
\label{tab:dnd-four-classes-matrix}
\end{table}"""
    f.write(footer)