In [2]:
# Imports
import laserhockey.hockey_env as h_env
import os
import numpy as np

from dqn_agent import DQNAgent
from evaluate_func import evaluate

In [4]:
# Hyperparameters
# Define hyperparameters
gamma = 1
eps_min = 0.
eps_max = 0.
eps_decay = 0.95
target_update_freq = 20
learning_rate = 1e-3
num_evaluation_episodes = 200
max_steps_per_episode = 500
batch_size = 32
use_target = True
use_dueling = True
use_clipping = True

In [6]:
env = h_env.HockeyEnv(mode=0, verbose=False)
opponent = h_env.BasicOpponent(weak=True)
ac_space = env.discrete_action_space
o_space = env.observation_space

In [14]:
q_agent = DQNAgent(o_space, ac_space, discount=gamma, eps=eps_max, eps_end=eps_min, 
                   eps_decay=eps_decay, use_target_net=use_target, update_target_every= target_update_freq, 
                   dueling=use_dueling, cliping=use_clipping)
q_agent.load_agent("saved_models/", step=0)

In [21]:
won_games = 0
lost_games = 0
drawn_games = 0
ep_rewards = []
ep_length = []
ep_touched_puck = []


for episode_counter in range(num_evaluation_episodes):
    episode_reward = 0
    episode_length = 0
    obs, _ = env.reset()
    obs_opp = env.obs_agent_two()

    agent_touched_puck = False
    puck_starts_in_our_half = True if env.puck.position[0] < 5 else False

    for step in range(env.max_timesteps + 1):
            if env.player1_has_puck and puck_starts_in_our_half: 
                agent_touched_puck = True

            discr_agent_move = q_agent.act(obs)
            agent_move = env.discrete_to_continous_action(discr_agent_move)
            opponent_move = opponent.act(obs_opp)

            (obs_next, reward, done, _, _) = env.step(np.hstack((agent_move, opponent_move)))

            obs = obs_next
            obs_opp = env.obs_agent_two()
            episode_reward += reward
            episode_length += 1

            if done:
                ep_rewards.append(episode_reward)
                ep_length.append(episode_length)
                ep_touched_puck.append(agent_touched_puck)
                episode_length = 0
                if env.winner == 1:
                    won_games += 1
                elif env.winner == -1:
                    lost_games += 1
                else:
                    drawn_games += 1
                break                


print("Evaluation stats:")
print(f'Won: {won_games}, Lost: {lost_games}')
print(f'Winrate: {np.round(won_games / num_evaluation_episodes, 2)}')
print(f'Average reward: {np.round(np.mean(ep_rewards), 2)}')
print(f'Average episode length: {np.round(np.mean(ep_length), 2)}')

Evaluation stats:
Won: 37, Lost: 86
Winrate: 0.18
Average reward: -9.96
Average episode length: 171.22
