In [19]:
from natural20.map import Map, Terrain
from natural20.battle import Battle
from natural20.player_character import PlayerCharacter
from natural20.map_renderer import MapRenderer
from natural20.die_roll import DieRoll
from natural20.generic_controller import GenericController
from natural20.session import Session
from natural20.actions.move_action import MoveAction
from natural20.action import Action
from natural20.gym.dndenv import dndenv, action_type_to_int
from gymnasium import register, envs, make
from llm_interface import GPT4Interfacer, LLama3Interface
from natural20.gym.dndenv_controller import DndenvController
from model import QNetwork
from natural20.gym.llm_helpers.prompting_utils import action_to_prompt
from natural20.gym.dndenv import embedding_loader
from natural20.event_manager import EventManager
from natural20.gym.dqn.policy import ModelPolicy
from llm_interface import GPT4Interfacer
import os
import time
import torch
import random
import numpy as np
from tqdm.autonotebook import tqdm

In [20]:
ROUND_PER_MATCH = 30
# setup vLLM endpoints
LLAMA3_URL = "http://localhost:8001/v1"
MISTRAL_URL = "http://localhost:8000/v1"
GPT4_TOKEN = "OPENAI_GPT_TOKEN_HERE"
WEIGHTS_FOLDER = "model_weights_all"

In [21]:
event_manager = EventManager()
event_manager.standard_cli()
session = Session(root_path="map_with_obstacles", event_manager=event_manager)

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
class Agent:
    def action(self, observation, info):
        return random.choice(info['available_moves'])

class CustomAgent(Agent):
    def __init__(self, llm_interface):
        self.llm_interface = llm_interface

    def action(self, observation, info):
        return self.llm_interface.select_action_for_state(observation, info)
    def __str__(self) -> str:
        return "Custom LLM Agent"

class ModelAgent(Agent):
    def __init__(self, model_policy):
        self.model_policy = model_policy

    def action(self, observation, info):
        return self.model_policy.action(observation, info)


Setup the appropriate URLs to your vLLM instances

In [24]:
def prompt_for_variant(variant):
    if variant == "llama3":
        prompt = GPT4Interfacer(debug=False, tools=False, base_url=LLAMA3_URL, api_key="token1234", variant='NousResearch/Meta-Llama-3.1-8B-Instruct')
    elif variant == "gpt4":
        prompt = GPT4Interfacer(debug=False, tools=True, api_key=GPT4_TOKEN, variant='gpt-4o-mini')
    elif variant == "mistral":
        prompt = GPT4Interfacer(debug=False, tools=False, base_url=MISTRAL_URL, api_key="token1234", variant='mistralai/Mistral-7B-Instruct-v0.3')
    else:
        raise ValueError(f"Invalid variant: {variant}")

    return prompt

In [25]:
def start_game(player="rl_rules_trained", adversary="llm_llama3"):
    player_agent = None
    if player == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_dnd_egreedy.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        player_agent = ModelAgent(model)
    elif player == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        player_agent = ModelAgent(model)        
    elif player.startswith("llm"):
        prompt = prompt_for_variant(player.split("_")[1])
        player_agent = CustomAgent(prompt)
    elif player == "random":
        player_agent = Agent()
    else:
        raise ValueError(f"Invalid player: {player}")

    # Setup Adversary
    if adversary == "rl_rules_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_dnd_egreedy.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_llama3_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_mistral_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_mistral.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary == "rl_gpt4_trained":
        model = ModelPolicy(session, weights_file=f"{WEIGHTS_FOLDER}/model_best_llm_adversary_gpt4.pt", device=device, debug=False)
        adversary_agent = ModelAgent(model)
    elif adversary.startswith("llm"):
        prompt = prompt_for_variant(adversary.split("_")[1])
        adversary_agent = CustomAgent(prompt)
    elif adversary == "ai":
        adversary_agent = None
    elif adversary == "random":
        adversary_agent = Agent()
    else:
        raise ValueError(f"Invalid adversary: {adversary}")

    def reaction_callback(state, reward, done, truncated, info):
        """
        Callback function to be called when the environment is waiting for a reaction from the agent.
        Reactions in DnD are typically reactions to enemy actions, such as opportunity attacks.
        """
        print(f"{info['reactor']}: Reaction for {info['trigger']}:")
        action = player_agent.action(state, info)
        return action

    env = make("dndenv-v0", root_path="map_with_obstacles",
           render_mode="ansi",
           custom_agent=adversary_agent,
           profiles=lambda: random.choice(['high_elf_fighter']),
                enemies=lambda: random.choice(['high_elf_fighter']),
                map_file=lambda: random.choice(['maps/simple_map',\
                                                'maps/complex_map', \
                                                'maps/game_map', \
                                                'maps/walled_map']),
           debug=False,
           show_logs=False)
    wins = 0
    losses = 0
    ties = 0
    total_rounds = []
    for round in tqdm(range(ROUND_PER_MATCH), leave=False):
        print(f"Round {round + 1}")

        observation, info = env.reset(reaction_callback=reaction_callback)
        action = player_agent.action(observation, info)
        terminal = False
        steps = 0

        while not terminal and steps < 512:
            steps += 1
            observation, reward, terminal, truncated, info = env.step(action)

            if not terminal and not truncated:
                action = player_agent.action(observation, info)

            if terminal or truncated:
                print(f"Reward: {reward}")
                if reward == 10:
                    wins += 1
                elif reward == -10:
                    losses += 1
                else:
                    ties += 1
                break
        total_rounds.append(info['round'])

    print(f"Wins: {wins}, Losses: {losses}, Ties: {ties}")
    return wins, losses, ties, np.mean(total_rounds)

Create a match grid

In [26]:
match_grid = {}

Setup pairings for the tournament

In [53]:
# setup available players and adversaries, note that duplicate pairings are automatically removed

players = ["random", "llm_mistral", "llm_llama3", "llm_gpt4", "rl_rules_trained", "rl_llama3_trained", "rl_gpt4_trained", "rl_mistral_trained"]
adversaries = ["random", "llm_mistral", "llm_llama3", "llm_gpt4", "ai", "rl_rules_trained", "rl_llama3_trained", "rl_gpt4_trained", "rl_mistral_trained"]

# remove from matchgrid first

# for player in players:
#     for adversary in adversaries:
#         if (player,adversary) in match_grid:
#             del match_grid[(player, adversary)]
#         if (adversary, player) in match_grid:
#             del match_grid[(adversary, player)]

In [46]:


match_paring = []

# create a cartesian product of all players and adversaries
for player in players:
    for adversary in adversaries:
        if (player, adversary) in match_grid:
            continue
        if (adversary, player) in match_grid:
            continue
        if adversary == player:
            continue
        match_paring.append((player, adversary))

for player, adversary in tqdm(match_paring):
    try:
        wins, losses, ties, avg_rounds = start_game(player=player, adversary=adversary)
        match_grid[(player, adversary)] = (wins, losses, ties, avg_rounds)
        match_grid[(adversary, player)] = (losses, wins, ties, avg_rounds)
    except Exception as e:
        print(f"Error: {e} on {player} vs {adversary}")
        match_grid[(player, adversary)] = (0, 0, 0, 0)
        match_grid[(adversary, player)] = (0, 0, 0, 0)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Round 1


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


Reward: 10
Round 2
Reward: 10
Round 3
Reward: -10
Round 4
gomerin: Reaction for opportunity_attack:
Reward: -10
Round 5
Reward: 10
Round 6
Reward: -10
Round 7
Reward: -10
Round 8
Reward: 10
Round 9
Reward: 10
Round 10
Reward: -10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: 0
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: 10
Round 20
Reward: -10
Round 21
Reward: 10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: 0
Round 26
Reward: -10
Round 27
Reward: 0
Round 28
Reward: -10
Round 29
Reward: 10
Round 30
gomerin: Reaction for opportunity_attack:
Reward: 10
Wins: 16, Losses: 11, Ties: 3


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: 10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
gomerin: Reaction for opportunity_attack:
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 5
Reward: -10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
gomerin: Reaction for opportunity_attack:
Reward: 10
Round 9
Reward: -10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: 10
Round 13
Reward: 10
Round 14
Reward: 10
Round 15
Reward: 10
Round 16
Reward: -10
Round 17
Reward: 10
Round 18
Reward: -10
Round 19
Reward: 10
Round 20
Reward: 10
Round 21
Reward: -10
Round 22
Reward: 10
Round 23
Reward: 10
Round 24
Reward: -10
Round 25
Reward: 10
Round 26
Reward: -10
Round 27
Reward: -10
Round 28
Reward: -10
Round 29
Reward: -10
Round 30
gomerin: Reaction for opportunity_attack:
Reward: 10
Wins: 19, Losses: 11, Ties: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Round 1
Reward: -10
Round 2
Reward: 10
Round 3
Reward: 10
Round 4
Reward: 10
Round 5
Reward: 10
Round 6
Reward: 10
Round 7
Reward: 10
Round 8
Reward: -10
Round 9
Reward: 10
Round 10
Reward: 10
Round 11
Reward: -10
Round 12
Reward: -10
Round 13
Reward: 10
Round 14
Reward: -10
Round 15
Reward: -10
Round 16
Reward: -10
Round 17
Reward: -10
Round 18
Reward: 10
Round 19
Reward: -10
Round 20
Reward: 10
Round 21
Reward: 10
Round 22
Reward: -10
Round 23
Reward: -10
Round 24
Reward: -10
Round 25
Reward: -10
Round 26
Reward: -10
Round 27
Reward: 10
Round 28
Reward: 10
Round 29
Reward: -10
Round 30
Reward: -10
Wins: 14, Losses: 16, Ties: 0


Generate Report about the matches

In [57]:
import pandas as pd

# setup a pandas table to plot the wins, losses, and ties for each matchup
df = pd.DataFrame(match_grid).T
df.columns = ['Wins', 'Losses', 'Ties', 'AVG Rounds']
df = df.sort_values('Wins', ascending=False)
df

Unnamed: 0,Unnamed: 1,Wins,Losses,Ties,AVG Rounds
llm_mistral,random,29.0,1.0,0.0,11.000000
ai,random,29.0,1.0,0.0,3.800000
rl_gpt4_trained,random,29.0,1.0,0.0,13.433333
rl_llama3_trained,random,29.0,1.0,0.0,12.233333
llm_gpt4,random,28.0,2.0,0.0,10.500000
...,...,...,...,...,...
random,llm_gpt4,2.0,28.0,0.0,10.500000
random,ai,1.0,29.0,0.0,3.800000
random,rl_llama3_trained,1.0,29.0,0.0,12.233333
random,rl_gpt4_trained,1.0,29.0,0.0,13.433333


In [59]:
leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)

with open("leaderboard_fighters.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\begin{tabular}{|l|c|c|c|c|}
\hline
\textbf{Agent} & \textbf{Wins} & \textbf{Losses} & \textbf{Ties} & \textbf{AVG Rounds} \\
\hline
"""
    f.write(header + "\n")
    for index, row in leaderboard.iterrows():
        player_str = index.replace('_', '\_')
        f.write(f"{player_str} & {row['Wins']} & {row['Losses']} & {row['Ties']} & {row['AVG Rounds']:.2f} \\\\\n")
    footer = r"""
\hline
\end{tabular}
\caption{Leaderboard for the Fighter Class Tournament: LLMs vs RL Agents}
\label{tab:fighter-class-leaderboard}
\end{table}
"""
    f.write(footer)

In [49]:
# create a table of how many times an agent has "won" against another agent
win_table = {}
lost_against = {}
for (player, adversary) in match_grid.keys():
    wins, losses, _, _ = match_grid[(player, adversary)]
    win_table[player] = win_table.get(player, 0)
    if wins > losses:
        win_table[player] += 1
    else:
        losses = lost_against.get(player, [])
        losses.append(adversary)
        lost_against[player] =  losses

lost_against
win_table

# create a table ranking the agents by how many times they have won
df = pd.DataFrame(win_table.items(), columns=["Agent", "Wins"])
df = df.sort_values('Wins', ascending=False)
df.to_csv("agent_ranking_fighters.csv")
df

Unnamed: 0,Agent,Wins
1,llm_mistral,7
3,llm_gpt4,6
7,rl_gpt4_trained,6
8,rl_mistral_trained,5
4,ai,4
2,llm_llama3,3
5,rl_rules_trained,3
6,rl_llama3_trained,2
0,random,0


Dump results to CSV

In [50]:
df.to_csv("match_results_fighter.csv")

In [51]:
# create a leaderboard on the most wins

leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)
leaderboard

  leaderboard = df.groupby(level=0).sum().sort_values('Wins', ascending=False)


Unnamed: 0,Wins
1,7
3,6
7,6
8,5
4,4
2,3
5,3
6,2
0,0


In [33]:
leaderboard.to_csv("leaderboard_fighter.csv")

In [55]:
adversaries.sort()

with open("match_grid_fighters.latex", "w") as f:
    header =  r"""
\begin{table}[h]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|}
\hline"""
    f.write(header + "\n")
    f.write("\\textbf{Agent}")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(" & \\textbf{" + f"{player_str}" + "}")
    f.write(" \\\\\n")
    f.write("\\hline\n")
    for player in adversaries:
        player_str = player.replace('_', '\_')
        f.write(f"{player_str}")
        for adversary in adversaries:
            if (player, adversary) in match_grid:
                wins, losses, ties, avg_rounds = match_grid[(player, adversary)]
                f.write(f" & {wins}/{losses}/{ties}")
            else:
                f.write(" & - ")
        f.write(" \\\\\n")
    footer = r"""
\hline
\end{tabular}%
}
\caption{D\&D Fighter-Class Tournament: Win/Loss/Tie Matrix}
\label{tab:fighter-class-matrix}
\end{table}"""
    f.write(footer)