In [1]:
import gym
import numpy as np

In [2]:
class Agent:
    def __init__(self, env: gym.Env) -> None:
        self.env = env

    def get_action(self) -> int:
        return self.env.action_space.sample()

    def play(self, episodes: int, render: bool = True) -> list:
        rewards = [0.0 for _ in range(episodes)]
        for episode in range(episodes):
            self.env.reset()
            total_reward = 0.0

            while True:
                if render:
                    self.env.render() # Rendern wird beim Trainieren einfach deaktivieren
                action = self.get_action()
                state, reward, done, _ = self.env.step(action)
                total_reward += reward
                if done:
                    break

            print(f"Episode: {episode} --- Total reward: {total_reward}")
            rewards.append(total_reward)
        self.env.close()
        return rewards

In [3]:
def main() -> None:
    games = [
        # "CartPole-v1",
        # "MountainCar-v0",
        # "PongNoFrameskip-v4",
        "Breakout-v0",
    ]

    for game in games:
        env = gym.make(game)
        agent = Agent(env)
        rewards = agent.play(episodes=100, render=True)

        rewards_mean = np.mean(rewards)
        rewards_min = np.min(rewards)
        rewards_max = np.max(rewards)

        print(f"Rewards_mean: {rewards_mean}")
        print(f"Rewards_min: {rewards_min}")
        print(f"Rewards_max: {rewards_max}")

In [4]:
if __name__ == "__main__":
    main()

Episode: 0 --- Total reward: 10.0
Rewards_mean: 5.0
Rewards_min: 0.0
Rewards_max: 10.0
Episode: 0 --- Total reward: -200.0
Rewards_mean: -100.0
Rewards_min: -200.0
Rewards_max: 0.0




Episode: 0 --- Total reward: -21.0
Rewards_mean: -10.5
Rewards_min: -21.0
Rewards_max: 0.0
Episode: 0 --- Total reward: 1.0
Rewards_mean: 0.5
Rewards_min: 0.0
Rewards_max: 1.0
