In [2]:
import gymnasium as gym
import hockey.hockey_env as h_env
import numpy as np

In [3]:
env = h_env.HockeyEnv()

# 📌 Ausgabe grundlegender Infos
print("Action Space:", env.action_space)
print("Observation Space:", env.observation_space)

# Einmal Reset durchführen, um ein Beispiel für die Observations zu sehen
obs, info = env.reset()
print("Beispielhafte Observation:", obs)
print("Info Dict:", info)

# Schließen der Umgebung
env.close()

🎯 Action Space: Box(-1.0, 1.0, (8,), float32)
🎯 Observation Space: Box(-inf, inf, (18,), float32)
🔍 Beispielhafte Observation: [-3.          0.          0.          0.          0.          0.
  3.          0.          0.          0.          0.          0.
  1.92221165 -0.39473677  0.          0.          0.          0.        ]
🔍 Info Dict: {'winner': 0, 'reward_closeness_to_puck': 0.0, 'reward_touch_puck': 0.0, 'reward_puck_direction': 0.0}


In [4]:
# Beispielhafte Observation ausgeben
obs, info = env.reset()
print("🔍 Beispielhafte Observation:", obs)

# Welche Form hat die Observation?
print("🔍 Observation Shape:", obs.shape)


🔍 Beispielhafte Observation: [-3.          0.          0.          0.          0.          0.
  3.          0.          0.          0.          0.          0.
 -1.48558497 -0.64969659  0.          0.          0.          0.        ]
🔍 Observation Shape: (18,)


In [5]:
import time

# Starte die Umgebung
env = h_env.HockeyEnv()
obs, info = env.reset()

# Probiere zufällige Aktionen aus und beobachte das Verhalten
for _ in range(50):
    env.render(mode="human")  # GUI anzeigen
    action = np.random.uniform(-1, 1, env.action_space.shape)  # Zufällige Aktion
    obs, reward, done, _, info = env.step(action)
    
    print(f"Step Reward: {reward:.2f} | New Observation: {obs}")

    if done:
        break

env.close()


Step Reward: 0.00 | New Observation: [-3.01764274e+00  4.83036041e-04  3.39480266e-02 -8.78606975e-01
 -1.83521882e-01  1.69740129e+00  3.00018120e+00  2.54731178e-02
 -1.56545602e-02  8.28835461e-03  1.17789149e+00 -7.82728016e-01
  1.20663404e+00  3.76695633e-01  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
Step Reward: 0.00 | New Observation: [-3.03376865e+00 -2.47771740e-02  6.72322363e-02 -7.96008825e-01
 -1.46640337e+00  1.66421068e+00  2.98239803e+00  5.24005890e-02
 -8.74449499e-04 -8.88377726e-01  1.43680346e+00  7.39005566e-01
  1.20663404e+00  3.76695633e-01  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
Step Reward: 0.00 | New Observation: [-3.07412887e+00 -7.91511536e-02  1.18683457e-01 -1.98878026e+00
 -3.03212643e+00  2.57256103e+00  2.97808123e+00  6.38542175e-02
  2.61930353e-03 -2.15882421e-01  5.94034433e-01  1.74687654e-01
  1.20663404e+00  3.76695633e-01  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
Step Re

In [6]:
print(f"Step Reward: {reward:.2f}")
for key, value in info.items():
    print(f"{key}: {value}")
print("-" * 50)


Step Reward: 0.00
winner: 0
reward_closeness_to_puck: 0.0
reward_touch_puck: 0.0
reward_puck_direction: 0.0
--------------------------------------------------


In [7]:
action_agent = np.random.uniform(-1, 1, 8)  # Zufällige Aktion
action_opponent = np.random.uniform(-1, 1, 8)  # Gegner macht auch Zufall

obs, reward, done, _, info = env.step(np.hstack([action_agent, action_opponent]))
print(f"Reward: {reward}, Info: {info}")


Reward: 0.0, Info: {'winner': 0, 'reward_closeness_to_puck': 0.0, 'reward_touch_puck': 0.0, 'reward_puck_direction': 0.0}
