In [4]:
import torch
import torch.nn.functional as F
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt
import time
from torch.utils.tensorboard import SummaryWriter
import hockey.hockey_env as h_env
import pickle

import memory
import tools

In [None]:
keep_mode = True
def discrete_to_continous_action(discrete_action):
    ''' converts discrete actions into continuous ones (for one player)
        The actions allow only one operation each timestep, e.g. X or Y or angle change.
        This is surely limiting. Other discrete actions are possible
        Action 0: do nothing
        Action 1: -1 in x
        Action 2: 1 in x
        Action 3: -1 in y
        Action 4: 1 in y
        Action 5: -1 in angle
        Action 6: 1 in angle
        Action 7: shoot (if keep_mode is on)
        '''
    action_cont = [(discrete_action == 1) * -1.0 + (discrete_action == 2) * 1.0,  # player x
                   (discrete_action == 3) * -1.0 + (discrete_action == 4) * 1.0,  # player y
                   (discrete_action == 5) * -1.0 + (discrete_action == 6) * 1.0]  # player angle
    if keep_mode:
      action_cont.append((discrete_action == 7) * 1.0)

    return action_cont

In [None]:
from agent_copy import QFunction

In [51]:
class DDQNAgent():
    def __init__(self,h=None,idx=4) -> None:
        if h is None:
            h = [1024, 1024, 1024]
        self.Q= QFunction(state_dim=18, action_dim=7, hidden_sizes=h, learning_rate=0.001, enable_dueling_dqn=True)
        if idx == 4:
            model_path = "models/Hockey_DuelingDQN_finetune_run3_cp12000_eps.pt"
        elif idx == 3:
            model_path = "models/Hockey_DuelingDQN_finetune_run5_cp22000_eps.pt"
        elif idx == 2:
            model_path = "models/Hockey_DuelingDQN_finetune_run5_cp200000_eps.pt"
        elif idx == 1:
            model_path = "models/Hockey_DuelingDQN_train_both_run2_cp3.pt"
        self.Q.load_state_dict(torch.load(model_path))
        print("Started DDQN agent")

    def act(self, state):
        return self.Q.greedy_action(state)

    def get_step(self, observation: list[float]) -> list[float]:
        action = self.act(np.array(observation)).tolist()
        return discrete_to_continous_action(action)
    


In [46]:
eval_agent4_strong = 0
eval_agent4_weak = 0

In [52]:
env = h_env.HockeyEnv()
ac_space = env.action_space
o_space = env.observation_space

In [48]:
eval_eps = 100
agent = DDQNAgent()
opponent = h_env.BasicOpponent(weak=False)
wins = 0

for i in range(eval_eps):
    state, _info = env.reset()
    for t in range(500):
        done = False
        a_discrete = agent.act(state)
        a_opponent = opponent.act(env.obs_agent_two())
        a = discrete_to_continous_action(a_discrete)
        state, reward, done, trunc, info = env.step(np.hstack([a, a_opponent]))
        if done: break
    if info["winner"] == 1:
        wins += 1

wins /= eval_eps
eval_agent4_strong = wins

opponent = h_env.BasicOpponent(weak=True)
wins = 0

for i in range(eval_eps):
    state, _info = env.reset()
    for t in range(500):
        done = False
        a_discrete = agent.act(state)
        a_opponent = opponent.act(state)
        a = discrete_to_continous_action(a_discrete)
        state, reward, done, trunc, info = env.step(np.hstack([a, a_opponent]))
        if done: break
    if info["winner"] == 1:
        wins += 1

wins /= eval_eps
eval_agent4_weak = wins

print(f"Wins against strong opponent (1): {eval_agent1_strong}")
print(f"Wins against weak opponent (1): {eval_agent1_weak}")
print(f"Wins against strong opponent (2): {eval_agent2_strong}")
print(f"Wins against weak opponent (2): {eval_agent2_weak}")
print(f"Wins against strong opponent (3): {eval_agent3_strong}")
print(f"Wins against weak opponent (3): {eval_agent3_weak}")
print(f"Wins against strong opponent (4): {eval_agent3_strong}")
print(f"Wins against weak opponent (4): {eval_agent3_weak}")

Started DDQN agent
Wins against strong opponent (1): 0.67
Wins against weak opponent (1): 0.37
Wins against strong opponent (2): 0.66
Wins against weak opponent (2): 0.31
Wins against strong opponent (3): 0.78
Wins against weak opponent (3): 0.43
Wins against strong opponent (4): 0.78
Wins against weak opponent (4): 0.43


In [59]:
agent1 = DDQNAgent(h=[256,256,256],idx=1)
agent2 = DDQNAgent(idx=2)
agent3 = DDQNAgent(idx=3)
agent4 = DDQNAgent(idx=4)

pairs = [(agent1, agent2), (agent1, agent3), (agent1, agent4), (agent2, agent3), (agent2, agent4), (agent3, agent4)]
names = ["agent 1 vs 2", "agent 1 vs 3", "agent 1 vs 4", "agent 2 vs 3", "agent 2 vs 4", "agent 3 vs 4"]
stats = [0, 0, 0, 0, 0, 0]

for i in range(len(pairs)):
    agent = pairs[i][0]
    opponent = pairs[i][1]
    wins = 0
    for _ in range(eval_eps):
        state, _info = env.reset()
        for t in range(500):
            done = False
            a_discrete = agent.act(state)
            a_opponent = discrete_to_continous_action(opponent.act(env.obs_agent_two()))
            a = discrete_to_continous_action(a_discrete)
            state, reward, done, trunc, info = env.step(np.hstack([a, a_opponent]))
            if done: break
        if info["winner"] == 1:
            wins += 1

    for _ in range(eval_eps):
        state, _info = env.reset()
        for t in range(500):
            done = False
            a_discrete = agent.act(env.obs_agent_two())
            a_opponent = discrete_to_continous_action(opponent.act(state))
            a = discrete_to_continous_action(a_discrete)
            state, reward, done, trunc, info = env.step(np.hstack([ a_opponent, a]))
            if done: break
        if info["winner"] == 0:
            wins += 1

    stats[i] = wins / (2.0 * eval_eps)
    print(f"Match: {names[i]} ended with {stats[i]} : {1 - stats[i]}")


Started DDQN agent
Started DDQN agent
Started DDQN agent
Started DDQN agent
Match: agent 1 vs 2 ended with 0.285 : 0.7150000000000001
Match: agent 1 vs 3 ended with 0.28 : 0.72
Match: agent 1 vs 4 ended with 0.26 : 0.74
Match: agent 2 vs 3 ended with 0.3 : 0.7
Match: agent 2 vs 4 ended with 0.33 : 0.6699999999999999
Match: agent 3 vs 4 ended with 0.265 : 0.735


In [58]:
print(stats)
print(names)
print(len(stats))
print(len(names))
print(len(pairs))

[0, 0, 0, 0, 0, 0]
['agent 1 vs 2', 'agent 1 vs 3', 'agent 1 vs 4', 'agent 2 vs 3', 'agent 2 vs 4', 'agent 3 vs 4']
6
6
6
