In [79]:
# !pip install pyyaml
# !pip install dndice
# !pip install python-i18n
# !pip install gymnasium
# !pip install inflect
# !pip install collections-extended
# !pip install openai
# !pip install -e ..


In [80]:
import unittest
from natural20.map import Map, Terrain
from natural20.battle import Battle
from natural20.player_character import PlayerCharacter
from natural20.map_renderer import MapRenderer
from natural20.die_roll import DieRoll
from natural20.generic_controller import GenericController
from natural20.utils.utils import Session
from natural20.actions.move_action import MoveAction
from natural20.action import Action
from natural20.gym.dndenv import dndenv
from gymnasium import register, envs, make
from model import QNetwork
import torch
import tqdm as tqdm
import random
import torch.optim as optim
import torch.nn as nn
import gc
import numpy as np
import sys
import collections

In [81]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [82]:
env = make("dndenv-v0", root_path="../templates")
print(env.observation_space)
print(env.action_space.sample)

Dict('health_enemy': Box(0.0, 1.0, (1,), float64), 'health_pct': Box(0.0, 1.0, (1,), float64), 'map': Box(-1, 255, (12, 12, 3), int64), 'movement': Discrete(255), 'turn_info': Box(0, 1, (3,), int64))
<bound method Tuple.sample of Tuple(Box(-1, 8, (1,), int64), Box(-1, 1, (2,), int64), Box(-6, 6, (2,), int64), Discrete(2))>


In [83]:
model = QNetwork(device=device)
model.to(device)
state, info = env.reset()
moves = info["available_moves"]

model.eval()
print(model(state, moves[0]))




loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (17) + 5 value: 22.2
rumblebelly -> initiative roll: (14) + 5 value: 19.2
gomerin starts their turn.
tensor([[0.0006]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [84]:
def generate_trajectory(env, model, policy='e-greedy', temperature=5.0, epsilon=0.1, quick_exit=False):
    state, info = env.reset()
    done = False
    truncated = False
    states = []
    actions = []
    rewards = []
    dones = []
    truncateds = []
    infos = []

    while not done and not truncated:
        # instead of sampling  (e.g. env.action_space.sample()) we can ask help from the enivronment to obtain valid moves
        # as there are sparse valid moves in the environment
        available_moves = info["available_moves"]
        with torch.no_grad():
            if policy == 'e-greedy':
                if random.random() < epsilon:
                    chosen_index = random.choice(range(len(available_moves)))
                else:
                    values = torch.stack([model(state, move) for move in available_moves])
                    chosen_index = torch.argmax(values).item()
            elif policy == 'greedy':
                    values = torch.stack([model(state, move) for move in available_moves])
                    chosen_index = torch.argmax(values).item()
            else:
                raise ValueError(f"Unknown policy: {policy}")
        
        action = available_moves[chosen_index]
        state, reward, done, truncated, info = env.step(action)       
        
        states.append(state)
        actions.append(action)
        rewards.append(reward)
        dones.append(done)
        truncateds.append(truncated)
        infos.append(info)

        if done or truncated:
            break    
        
    return states, actions, rewards, dones, truncateds, infos

In [85]:
trajectory = generate_trajectory(env, model)
print(trajectory)

loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (9) + 5 value: 14.2
rumblebelly -> initiative roll: (19) + 5 value: 24.2
rumblebelly starts their turn.
rumblebelly uses Second Wind with (6) + 1 healing
rumblebelly dashes
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 2] 5 feet
rumblebelly moved to [0, 1] 5 feet
rumblebelly moved to [0, 2] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 5] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 3] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rol

In [86]:
EPISODES = 10
avg_reward = 0
for i in tqdm.tqdm(range(EPISODES)):
    states, actions, rewards, dones, truncateds, infos = generate_trajectory(env, model)
    avg_reward += sum(rewards)

avg_reward /= EPISODES
print(f"Average reward: {avg_reward}")

  0%|          | 0/10 [00:00<?, ?it/s]

loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (3) + 5 value: 8.2
rumblebelly -> initiative roll: (14) + 5 value: 19.2
rumblebelly starts their turn.
rumblebelly uses Second Wind with (8) + 1 healing
rumblebelly dashes
rumblebelly moved to [0, 0] 5 feet
rumblebelly moved to [0, 1] 5 feet
rumblebelly moved to [0, 0] 5 feet
rumblebelly moved to [1, 1] 5 feet
rumblebelly moved to [1, 2] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 2] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (1) + 7=8 using Longbow and misses rumblebelly
gomerin uses Second W

 10%|█         | 1/10 [00:01<00:10,  1.20s/it]

rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 2] 5 feet
gomerin moved to [0, 3] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 2] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 4/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly rolls (1) + 7=8 using Rapier and misses gomerin
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 5 feet
rumblebelly: move to [1, 2]
rumblebelly moved to [1, 2] 5 feet
rumblebelly: move to [0, 1]
rumblebelly moved to [0, 1] 5 feet
rumblebelly: move to [1, 1]
rumblebelly moved to [1, 1] 5 feet
rumblebelly: move to [0, 1]
rumblebelly moved to [0, 1] 5 feet
no move for rumblebelly
gomerin starts their turn.
Result: False
gomerin rolls (7 | 2) + 7=9 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin

 20%|██        | 2/10 [00:01<00:06,  1.14it/s]

gomerin moved to [0, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 16/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((5 + 6) + 5) 16 damage!
gomerin takes 16 damage!
gomerin died. :(
tpk
Result: tpk
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (12) + 5 value: 17.2
rumblebelly -> initiative roll: (12) + 5 value: 17.2
gomerin starts their turn.
gomerin uses Second Wind with (7) + 1 healing
gomerin rolls (2 | 3) + 7=9 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
gomerin moved to [1, 5] 5 feet
gomerin moved to [2, 4] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin 

 30%|███       | 3/10 [00:02<00:04,  1.42it/s]

gomerin moved to [0, 4] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly attacks gomerin using Rapier for ((5) + 5) 10 damage!
gomerin takes 10 damage!
gomerin died. :(
tpk
Result: tpk
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (13) + 5 value: 18.2
rumblebelly -> initiative roll: (19) + 5 value: 24.2
rumblebelly starts their turn.
rumblebelly rolls (2) + 7=9 using Rapier and misses gomerin
rumblebelly uses Second Wind with (2) + 1 healing
rumblebelly moved to 

 40%|████      | 4/10 [00:02<00:04,  1.48it/s]

gomerin attacks rumblebelly with disadvantage['ranged_with_enemy_in_melee'] using Longbow for ((8) + 5) 13 damage!
rumblebelly takes 13 damage!
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 4] 5 feet
gomerin moved to [0, 3] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [1, 4] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 11/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (1) + 7=8 using Longbow and misses gomerin
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 5 feet
no move for rumblebelly
gomerin starts their 

 50%|█████     | 5/10 [00:03<00:03,  1.56it/s]

rumblebelly attacks gomerin using Rapier for ((3) + 5) 8 damage!
gomerin takes 8 damage!
gomerin died. :(
tpk
Result: tpk
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (11) + 5 value: 16.2
rumblebelly -> initiative roll: (12) + 5 value: 17.2
rumblebelly starts their turn.
rumblebelly uses Second Wind with (1) + 1 healing
rumblebelly dashes
rumblebelly moved to [3, 0] 5 feet
rumblebelly moved to [2, 1] 5 feet
rumblebelly moved to [1, 0] 5 feet
rumblebelly moved to [0, 1] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin attacks rumblebelly using Longbow for ((5) + 5) 10 damage!
rumblebelly takes 10 damage!
gomerin uses Second Wind with (2) + 1

 60%|██████    | 6/10 [00:04<00:02,  1.44it/s]

rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 3] 5 feet
gomerin moved to [0, 4] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 5] 5 feet
gomerin moved to [1, 5] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 5] 5 feet
gomerin moved to [0, 4] 5 feet
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 3] 5 feet
gomerin moved to [0, 4] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 14/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly attacks gomerin using Rapier for ((5) + 5) 10 damage!
gomerin takes 10 damage!
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 5 feet
rumblebelly: move to [1, 3]
rumblebelly moved to [1, 3] 5 feet
rumblebelly: move to [0, 3]
rumblebelly moved to [0, 3] 

 70%|███████   | 7/10 [00:05<00:02,  1.36it/s]

rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 2] 5 feet
gomerin moved to [1, 3] 5 feet
gomerin moved to [0, 2] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 10/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((1) + 5) 6 damage!
gomerin takes 6 damage!
gomerin died. :(
tpk
Result: tpk
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (20) + 5 value: 25.2
rumblebelly -> initiative roll: (15) + 5 value: 20.2
gomerin starts their turn.
gomerin moved to [3, 3] 5 feet
gomerin uses Second Wind with (5) + 1 healing
gomerin dashes
gomerin moved to [2, 4] 5 feet
gomerin move

 80%|████████  | 8/10 [00:06<00:01,  1.29it/s]

rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 5] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 14/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly attacks gomerin using Rapier for ((2) + 5) 7 damage!
gomerin takes 7 damage!
gomerin died. :(
tpk
Result: tpk
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (15) + 5 value: 20.2
rumblebelly -> initiative roll: (15) + 5 value: 20.2
gomerin starts their turn.
gomerin uses Second Wind with (8) + 1 healing
gomerin dashes
gomerin moved to [3, 0] 5 feet
gomerin moved to [2, 1] 5 feet
gomerin moved 

 90%|█████████ | 9/10 [00:06<00:00,  1.33it/s]

rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [0, 4] 5 feet
gomerin moved to [0, 5] 5 feet
gomerin moved to [0, 4] 5 feet
gomerin moved to [0, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 16/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (1) + 7=8 using Longbow and misses gomerin
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [1, 4]
rumblebelly moved to [1, 4] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [1, 4]
rumblebelly moved to [1, 4] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
no move for rumblebelly
gomerin starts their turn.
Result: False
gomerin rolls (1 | 6) + 7=8 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [1, 5] 5 feet
gomerin mov

100%|██████████| 10/10 [00:07<00:00,  1.33it/s]

gomerin rolls (9 | 19) + 7=16 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [1, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly rolls (1) + 7=8 using Rapier and misses gomerin
rumblebelly: move to [0, 5]
rumblebelly moved to [0, 5] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [0, 5]
rumblebelly moved to [0, 5] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0, 4] 5 feet
rumblebelly: move to [0, 5]
rumblebelly moved to [0, 5] 5 feet
no move for rumblebelly
gomerin starts their turn.
Result: False
gomerin rolls (6 | 15) + 7=13 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
rumblebelly: rumblebelly uses rapier as a reaction to attack gomerin
gomerin moved to [1,




In [87]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def push(self, states, actions, rewards, infos, is_terminal):
        self.buffer.append((states, actions, rewards, infos, is_terminal))

    def sample(self, batch_size):
        buffer = list(self.buffer)
        indices = np.random.choice(len(buffer), batch_size)
        states, actions, rewards, infos, is_terminals = zip(*[buffer[idx] for idx in indices])
        return states, actions, rewards, infos, is_terminals

    def __len__(self):
        return len(self.buffer)
    
    # memory usage of the buffer in bytes
    def memory_usage(self):
        total_size = 0
        for item in self.buffer:
            states, actions, rewards, infos, is_terminals = item
            for s in states:
                total_size += sys.getsizeof(s)
            total_size += sys.getsizeof(actions)
            total_size += sys.getsizeof(rewards)
            total_size += sys.getsizeof(infos)
            total_size += sys.getsizeof(is_terminals)

        return total_size

In [88]:

# generate a batch of trajectories and store them in the replay buffer
def generate_batch_trajectories(env, model, n_rollout, replay_buffer: ReplayBuffer, temperature=5.0, epsilon=0.1, policy='e-greedy'):
    print(f"generating {n_rollout} rollouts")
    for _ in range(n_rollout):
        state, action, reward, done, truncated, info = generate_trajectory(env, model, temperature=temperature, epsilon=epsilon, policy=policy)
        replay_buffer.push(state, action, reward, info, done)

In [89]:
TRAJECTORY_POLICY = "e-greedy"
NUM_UPDATES = 2
TEMP_DECAY = 0.999
BUFFER_CAPACITY = 2000
FRAMES_TO_STORE = 2
MAX_STEPS = 100
BATCH_SIZE = 32
TARGET_UPDATE_FREQ = 1
T_HORIZON = 1024
EPSILON_START = 1.0
EPSILON_FINAL = 0.02
EPSILON_DECAY_FRAMES = 10**3

In [90]:


def train(env, gamma, learning_rate, max_steps=MAX_STEPS, n_rollout=8, seed=1337):
  print(f"training with gamma {gamma} and learning rate {learning_rate}")
  env.seed(seed)

  replay_buffer = ReplayBuffer(100)

  # load model checkpoint if available
  model = QNetwork(device).to(device)
  target_model = QNetwork(device).to(device)

  # intialize target network with the same weights as the model
  target_model.load_state_dict(model.state_dict())

  optimizer = optim.Adam(model.parameters(), lr=learning_rate)

  temperature = 5.0
  reward_per_episode = []
  epsilon = EPSILON_START

  for step in range(max_steps):
    if TRAJECTORY_POLICY == 'softmax':
      print(f"step {step} t={temperature}")
    elif TRAJECTORY_POLICY == 'e-greedy':
      print(f"step {step} epsilon={epsilon}")
    else:
      print(f"step {step}")
    generate_batch_trajectories(env, model, n_rollout, replay_buffer, temperature=temperature, epsilon=epsilon, policy=TRAJECTORY_POLICY)

    states, actions, rewards, infos, is_terminals = replay_buffer.sample(BATCH_SIZE)
    rewards_collected = 0
    for _ in range(NUM_UPDATES):
      rewards_collected = 0
      total_loss = 0.0
      
      for i in range(len(states)):
        s = states[i]
        a = actions[i]
        env_info = infos[i]
        r = torch.tensor(rewards[i]).to(device)
        is_terminal = torch.tensor(is_terminals[i]).to(device)
        
        q_targets = torch.zeros(len(env_info)).to(device)

        for index, (state, info) in enumerate(zip(s, env_info)):
          q_values = [target_model([state], [avail_action]).detach() for avail_action in info["available_moves"]]
          if len(q_values) == 0:
            q_targets[index] = 0
          else:
            q_targets[index] = torch.max(torch.stack(q_values)).item()
        
        targets = r + gamma * q_targets * is_terminal
        output = model(s, a)
        q_sa = output

        # print(f"q_sa {q_sa.shape}")
        # print(f"targets {targets.shape}")
        value_loss = nn.MSELoss()(q_sa, targets)
        optimizer.zero_grad()
        value_loss.backward()
        total_loss += value_loss.item()
        rewards_collected += r.sum().item()
        optimizer.step()
      print(f"total loss {total_loss/len(states)}")

    # save model checkpoint

    if step % 10 == 0:
      # torch.save(model.state_dict(), f"model_{step}.pt")
      torch.save(model.state_dict(), f"model_{gamma}_{lr}.pt")

    if step % 100 == 0:
      torch.save(model.state_dict(), f"model_{gamma}_{lr}_{step}.pt")

    # if step % 5 == 0 and step > 0:
    _, _, rewards, _, _, _ = generate_trajectory(env, model, policy='greedy')
    total_reward = sum(rewards)
    
    reward_per_episode.append(total_reward)

    print(f"total reward: {total_reward}")
    print(f"{step}: rewards {rewards_collected}")
    gc.collect()
    
    # decay temp
    temperature = np.max([0.1, temperature * TEMP_DECAY])

    # decay epsilon
    epsilon = EPSILON_FINAL + (EPSILON_START - EPSILON_FINAL) * np.exp(-1.0 * step / EPSILON_DECAY_FRAMES)

    if step % TARGET_UPDATE_FREQ == 0:
      # calculate the avg change weights of the model with the target model
      total_change = 0
      for p, p_target in zip(model.parameters(), target_model.parameters()):
        total_change += torch.abs(p - p_target).sum().item()
      print(f"total change: {total_change}")

      target_model.load_state_dict(model.state_dict())

  env.close()
  return reward_per_episode


In [91]:
env = make("dndenv-v0", root_path="../templates")

seed = 1337
# Create a grid of learning rates and gammas
learning_rates = [0.001]
gammas = [0.99]

results = {}
for lr in learning_rates:
  results[lr] = {}
  for gamma in gammas:
    seed = seed + 1
    reward_per_episode = train(env, gamma, lr, max_steps=MAX_STEPS, seed=seed )
    results[lr][gamma] = reward_per_episode


  logger.warn(


training with gamma 0.99 and learning rate 0.001
step 0 epsilon=1.0
generating 8 rollouts
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (1) + 5 value: 6.2
rumblebelly -> initiative roll: (17) + 5 value: 22.2
rumblebelly starts their turn.
rumblebelly moved to [4, 2] 5 feet
rumblebelly moved to [3, 1] 5 feet
rumblebelly attacks gomerin using Longbow for ((7) + 5) 12 damage!
gomerin takes 12 damage!
rumblebelly moved to [2, 1] 5 feet
rumblebelly moved to [1, 0] 5 feet
rumblebelly moved to [2, 1] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 12/24===
Result: False
gomerin dashes
gomerin moved to [3, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current t

  return F.mse_loss(input, target, reduction=self.reduction)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.5384666845202446
total loss 3.5178548246622086
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (3) + 5 value: 8.2
rumblebelly -> initiative roll: (15) + 5 value: 20.2
rumblebelly starts their turn.
rumblebelly attacks gomerin using Longbow for ((8) + 5) 13 damage!
gomerin takes 13 damage!
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 11/24===
Result: False
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((6) + 5) 11 damage!
gomerin takes 11 damage!
gomerin is unconscious.
rumblebelly: move to [2, 0]
rumblebelly moved to [2, 0] 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.821764200925827
total loss 3.798600848764181
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (12) + 5 value: 17.2
rumblebelly -> initiative roll: (1) + 5 value: 6.2
gomerin starts their turn.
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((1) + 5) 6 damage!
gomerin takes 6 damage!
rumblebelly: move to [2, 1]
rumblebelly moved to [2, 1] 5 feet
rumblebelly: move to [3, 2]
rumblebelly moved to [3, 2] 5 feet
rumblebelly: move to [4, 3]
rumblebelly moved to [4, 3] 5 feet
rumblebelly: move to [4, 4]
rumblebelly moved to [4, 4] 5 feet
rumblebelly: move to [

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.485603716224432
total loss 3.4465822465717793
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (15) + 5 value: 20.2
rumblebelly -> initiative roll: (10) + 5 value: 15.2
gomerin starts their turn.
gomerin attacks rumblebelly using Longbow for ((4) + 5) 9 damage!
rumblebelly takes 9 damage!
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 15/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((4) + 5) 9 damage!
gomerin takes 9 damage!
rumblebelly: move to [1, 3]
rumblebelly moved to [1, 3] 5 feet
rumblebelly: move to [2, 2]
rumblebelly moved to [2, 2] 5 feet
rumblebelly: move to [2, 1]
rumblebelly moved to [2

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.790414746850729
total loss 3.819756258279085
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (17) + 5 value: 22.2
rumblebelly -> initiative roll: (11) + 5 value: 16.2
gomerin starts their turn.
gomerin attacks rumblebelly using Longbow for ((3) + 5) 8 damage!
rumblebelly takes 8 damage!
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 16/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (2) + 7=9 using Longbow and misses gomerin
rumblebelly: move to [3, 1]
rumblebelly moved to [3, 1] 5 feet
rumblebelly: move to [3, 2]
rumblebelly moved to [3, 2] 5 feet
rumblebelly: move to [3, 1]
rumblebelly moved to [3, 1] 5 feet
rumblebelly: move 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.3565373606979847
total loss 3.287665780633688
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (15) + 5 value: 20.2
rumblebelly -> initiative roll: (13) + 5 value: 18.2
gomerin starts their turn.
gomerin rolls (7) + 7=14 using Longbow and misses rumblebelly
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((3) + 5) 8 damage!
gomerin takes 8 damage!
rumblebelly: move to [4, 4]
rumblebelly moved to [4, 4] 5 feet
rumblebelly: move to [3, 3]
rumblebelly moved to [3, 3] 5 feet
rumblebelly: move to [3, 2]
rumblebelly moved to [3, 2] 5 feet
rumblebelly: move to

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.4356712512671947
total loss 3.4345095977187157
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (8) + 5 value: 13.2
rumblebelly -> initiative roll: (11) + 5 value: 16.2
rumblebelly starts their turn.
rumblebelly attacks gomerin using Longbow for ((2) + 5) 7 damage!
gomerin takes 7 damage!
rumblebelly moved to [4, 3] 5 feet
rumblebelly moved to [3, 4] 5 feet
rumblebelly moved to [2, 5] 5 feet
rumblebelly moved to [1, 5] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 17/24===
Result: False
gomerin attacks rumblebelly using Longbow for ((1) + 5) 6 damage!
rumblebelly takes 6 damage!
gomerin moved to [4, 5] 5 feet
gomerin moved t

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 4.450693748891354
total loss 4.462080229073763
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (4) + 5 value: 9.2
rumblebelly -> initiative roll: (8) + 5 value: 13.2
rumblebelly starts their turn.
rumblebelly rolls (8) + 7=15 using Longbow and misses gomerin
rumblebelly moved to [0, 2] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (4 | 19) + 7=11 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
gomerin moved to [0, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.3790519684553146
total loss 3.3728694282472134
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (20) + 5 value: 25.2
rumblebelly -> initiative roll: (6) + 5 value: 11.2
gomerin starts their turn.
gomerin rolls (10 | 2) + 7=9 with disadvantage['ranged_with_enemy_in_melee'] using Longbow and misses rumblebelly
gomerin moved to [3, 5] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (1) + 7=8 using Longbow and misses gomerin
rumblebelly: move to [4, 4]
rumblebelly moved to [4, 4] 5 feet

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 4.282227657735348
total loss 4.297333754599094
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (1) + 5 value: 6.2
rumblebelly -> initiative roll: (15) + 5 value: 20.2
rumblebelly starts their turn.
rumblebelly moved to [1, 5] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (5) + 7=12 using Longbow and misses rumblebelly
gomerin moved to [4, 1] 5 feet
gomerin moved to [3, 2] 5 feet
gomerin moved to [2, 3] 5 feet
gomerin moved to [1, 4] 5 feet
gomerin moved to [0, 4] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.4203556813299656
total loss 3.4147363044321537
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (7) + 5 value: 12.2
rumblebelly -> initiative roll: (14) + 5 value: 19.2
rumblebelly starts their turn.
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (10) + 7=17 using Longbow and misses rumblebelly
gomerin moved to [4, 3] 5 feet
gomerin moved to [3, 4] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with rapier
rumblebelly rolls (3) + 7=10 using Rapier and misses gomerin
rumblebelly: mov

  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.310512125492096
total loss 3.3071521185338497
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (9) + 5 value: 14.2
rumblebelly -> initiative roll: (9) + 5 value: 14.2
gomerin starts their turn.
gomerin moved to [4, 5] 5 feet
gomerin moved to [3, 5] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((8) + 5) 13 damage!
gomerin takes 13 damage!
rumblebelly: move to [1, 1]
rumblebelly moved to [1, 1] 5 feet
rumblebelly: move to [0, 2]
rumblebelly moved to [0, 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.637486055493355
total loss 3.652674227952957
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (1) + 5 value: 6.2
rumblebelly -> initiative roll: (3) + 5 value: 8.2
rumblebelly starts their turn.
rumblebelly attacks gomerin using Longbow for ((6) + 5) 11 damage!
gomerin takes 11 damage!
rumblebelly moved to [2, 1] 5 feet
rumblebelly moved to [1, 2] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 13/24===
Result: False
gomerin rolls (3) + 7=10 using Longbow and misses rumblebelly
gomerin moved to [0, 1] 5 feet
gomerin moved to [0, 2] 5 feet
gomerin moved to [0

  return F.mse_loss(input, target, reduction=self.reduction)


total loss 4.197424024343491
total loss 4.205713659524918
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (8) + 5 value: 13.2
rumblebelly -> initiative roll: (19) + 5 value: 24.2
rumblebelly starts their turn.
rumblebelly rolls (7) + 7=14 using Longbow and misses gomerin
rumblebelly moved to [0, 2] 5 feet
rumblebelly moved to [0, 3] 5 feet
rumblebelly moved to [0, 4] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (7) + 7=14 using Longbow and misses rumblebelly
gomerin moved to [3, 5] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
=

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 4.145755738019943
total loss 4.167432710528374
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (4) + 5 value: 9.2
rumblebelly -> initiative roll: (13) + 5 value: 18.2
rumblebelly starts their turn.
rumblebelly moved to [3, 3] 5 feet
rumblebelly moved to [2, 4] 5 feet
rumblebelly moved to [1, 5] 5 feet
rumblebelly moved to [0, 5] 5 feet
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin rolls (5) + 7=12 using Longbow and misses rumblebelly
gomerin moved to [3, 1] 5 feet
gomerin moved to [2, 2] 5 feet
gomerin moved to [1, 3] 5 feet
gomerin moved to [0, 4] 5 feet
gomerin moved to [1, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.3504856303334236
total loss 3.347773678600788
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (20) + 5 value: 25.2
rumblebelly -> initiative roll: (7) + 5 value: 12.2
gomerin starts their turn.
gomerin rolls (9) + 7=16 using Longbow and misses rumblebelly
gomerin moved to [3, 5] 5 feet
gomerin moved to [2, 5] 5 feet
gomerin moved to [1, 5] 5 feet
gomerin moved to [0, 5] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (3) + 7=10 using Longbow and misses gomerin
rumblebelly: move to [1, 4]
rumblebelly moved to [1, 4] 5 feet
rumblebelly: move to [0, 4]
rumblebelly moved to [0

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.8354639895260334
total loss 3.8481843657791615
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (6) + 5 value: 11.2
rumblebelly -> initiative roll: (6) + 5 value: 11.2
gomerin starts their turn.
gomerin moved to [0, 2] 5 feet
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 24/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly rolls (5) + 7=12 using Longbow and misses gomerin
rumblebelly: move to [2, 0]
rumblebelly moved to [2, 0] 5 feet
rumblebelly: move to [1, 1]
rumblebelly moved to [1, 1] 5 feet
rumblebelly: move to [0, 1]
rumblebelly moved to [0, 1] 5 feet
rumblebelly: move to [1, 1]
rumblebelly moved to [1, 1] 5 feet
rumblebelly: move

  return F.mse_loss(input, target, reduction=self.reduction)


total loss 3.179824199527502
total loss 3.1783353723585606
loading map from ../templates/maps/game_map.yml
map size: [6, 6]
==== Player Character ====
name: gomerin
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



==== Player Character ====
name: rumblebelly
level: 1
character class: {'fighter': 1}
hp: 24
max hp: 24
ac: 18
speed: 30



gomerin -> initiative roll: (5) + 5 value: 10.2
rumblebelly -> initiative roll: (14) + 5 value: 19.2
rumblebelly starts their turn.
==== end turn ===
gomerin starts their turn.
==== current turn gomerin 24/24===
Result: False
gomerin attacks rumblebelly using Longbow for ((6 + 4) + 5) 15 damage!
rumblebelly takes 15 damage!
==== end turn ===
rumblebelly starts their turn.
==== current turn rumblebelly 9/24===
rumblebelly: rumblebelly attacks gomerin with longbow
rumblebelly attacks gomerin using Longbow for ((1) + 5) 6 damage!
gomerin takes 6 damage!
rumblebelly: move to [2, 4]
rumblebelly moved to [2, 4] 5 feet
rumblebelly:

In [None]:
# plot the results
import matplotlib.pyplot as plt

for lr in learning_rates:
    for gamma in gammas:
        plt.plot(results[lr][gamma], label=f"lr={lr}, gamma={gamma}")
plt.legend()
plt.show()
