In [1]:
import numpy as np
import laserhockey.hockey_env as h_env
import gym
from importlib import reload
import time

In [2]:
np.set_printoptions(suppress=True)

# Normal Game Play

In [3]:
env = h_env.HockeyEnv()

have a look at the initialization condition: alternating who starts and are random in puck position

In [4]:
obs = env.reset()
obs_agent2 = env.obs_agent_two()
_ = env.render()

one episode with random agents

In [5]:
obs = env.reset()
obs_agent2 = env.obs_agent_two()

for _ in range(600):
    env.render()
    a1 = np.random.uniform(-1,1,4)
    a2 = np.random.uniform(-1,1,4)    
    obs, r, d, info = env.step(np.hstack([a1,a2]))    
    obs_agent2 = env.obs_agent_two()
    if d: break

Without rendering, it runs much faster

"info" dict contains useful proxy rewards and winning information

In [None]:
info

Winner == 0: draw

Winner == 1: you (left player)

Winner == -1: opponent wins (right player)

In [6]:
env.close()

# Train Shooting

In [7]:
env = h_env.HockeyEnv(mode=h_env.HockeyEnv.TRAIN_SHOOTING)

In [8]:
o = env.reset()
_ = env.render()

for _ in range(50):
    env.render()
    a1 = [1,0,0,1] # np.random.uniform(-1,1,4)
    a2 = [0,0.,0,0] 
    obs, r, d, info = env.step(np.hstack([a1,a2]))    
    obs_agent2 = env.obs_agent_two()
    if d: break

In [9]:
env.close()

# Train DEFENDING

In [16]:
env = h_env.HockeyEnv(mode=h_env.HockeyEnv.TRAIN_DEFENSE)

In [17]:
o = env.reset()
_ = env.render()

for _ in range(60):
    env.render()
    a1 = [0.1,0,0,1] # np.random.uniform(-1,1,3)
    a2 = [0,0.,0,0] 
    obs, r, d, info = env.step(np.hstack([a1,a2]))
    obs_agent2 = env.obs_agent_two()
    if d: break

In [18]:
env.close()

# Using discrete actions

In [None]:
import random

In [None]:
env = h_env.HockeyEnv(mode=h_env.HockeyEnv.TRAIN_SHOOTING)

In [None]:
env.reset()
for _ in range(200):
    env.render()
    a1_discrete = random.randint(0,7)
    a1 = env.discrete_to_continous_action(a1_discrete)
    a2 = [0,0.,0,0 ] 
    obs, r, d, info = env.step(np.hstack([a1,a2]))    
    obs_agent2 = env.obs_agent_two()
    if d: break

In [None]:
env.close()

# Hand-crafted Opponent

In [None]:
env = h_env.HockeyEnv()

In [None]:
o = env.reset()
_ = env.render()
player1 = h_env.BasicOpponent(weak=False)
player2 = h_env.BasicOpponent()

In [None]:
obs_buffer = []
reward_buffer=[]
obs = env.reset()
obs_agent2 = env.obs_agent_two()
for _ in range(250):
    env.render()
    a1 = player1.act(obs)
    a2 = player2.act(obs_agent2)
    obs, r, d, info = env.step(np.hstack([a1,a2]))    
    obs_buffer.append(obs)
    reward_buffer.append(r)
    obs_agent2 = env.obs_agent_two()
    if d: break
obs_buffer = np.asarray(obs_buffer)
reward_buffer = np.asarray(reward_buffer)

In [None]:
np.mean(obs_buffer,axis=0)

In [None]:
np.std(obs_buffer,axis=0)

If you want to use a fixed observation scaling, this might be a reasonable choice

In [None]:
scaling = [ 1.0,  1.0 , 0.5, 4.0, 4.0, 4.0,  
            1.0,  1.0,  0.5, 4.0, 4.0, 4.0,  
            2.0, 2.0, 10.0, 10.0, 4,0 ,4,0]

In [None]:
import pylab as plt

In [None]:
plt.plot(obs_buffer[:,2])
plt.plot(obs_buffer[:,8])

In [None]:
plt.plot(obs_buffer[:,12])

In [None]:
plt.plot(reward_buffer[:])

In [None]:
np.sum(reward_buffer)

In [None]:
env.close()

# Human Opponent

In [None]:
env = h_env.HockeyEnv()

In [None]:
player1 = h_env.HumanOpponent(env=env, player=1)
player2 = h_env.BasicOpponent()


In [None]:
player1 = h_env.BasicOpponent()
player2 = h_env.HumanOpponent(env=env, player=2)


In [None]:
obs = env.reset()

env.render()
time.sleep(1)
obs_agent2 = env.obs_agent_two()
for _ in range(20):
    time.sleep(0.2)
    env.render()
    a1 = player1.act(obs) 
    a2 = player2.act(obs_agent2)
    obs, r, d, info = env.step(np.hstack([a1,a2]))    
    obs_agent2 = env.obs_agent_two()
    if d: break

In [None]:
env.close()