In [1]:
import gym
import numpy as np

In [2]:
class Agent:
    def __init__(self, env: gym.Env) -> None:
        self.env = env

    def get_action(self) -> int:
        return self.env.action_space.sample()

    def play(self, episodes: int, render: bool = True) -> list:
        rewards = [0.0 for _ in range(episodes)]
        for episode in range(episodes):
            self.env.reset()
            total_reward = 0.0

            while True:
                if render:
                    self.env.render() # Rendern wird beim Trainieren einfach deaktivieren
                action = self.get_action()
                state, reward, done, _ = self.env.step(action)
                total_reward += reward
                if done:
                    break

            print(f"Episode: {episode} --- Total reward: {total_reward}")
            rewards.append(total_reward)
        self.env.close()
        return rewards

In [3]:
def main() -> None:
    games = [
        # "CartPole-v1",
        # "MountainCar-v0",
        # "PongNoFrameskip-v4",
        "Breakout-v0",
    ]

    for game in games:
        env = gym.make(game)
        agent = Agent(env)
        rewards = agent.play(episodes=100, render=True)

        rewards_mean = np.mean(rewards)
        rewards_min = np.min(rewards)
        rewards_max = np.max(rewards)

        print(f"Rewards_mean: {rewards_mean}")
        print(f"Rewards_min: {rewards_min}")
        print(f"Rewards_max: {rewards_max}")

In [4]:
if __name__ == "__main__":
    main()



Episode: 0 --- Total reward: 4.0
Episode: 1 --- Total reward: 0.0
Episode: 2 --- Total reward: 0.0
Episode: 3 --- Total reward: 1.0
Episode: 4 --- Total reward: 0.0
Episode: 5 --- Total reward: 2.0
Episode: 6 --- Total reward: 0.0
Episode: 7 --- Total reward: 1.0
Episode: 8 --- Total reward: 0.0
Episode: 9 --- Total reward: 2.0
Episode: 10 --- Total reward: 1.0
Episode: 11 --- Total reward: 3.0
Episode: 12 --- Total reward: 1.0
Episode: 13 --- Total reward: 1.0
Episode: 14 --- Total reward: 2.0
Episode: 15 --- Total reward: 3.0
Episode: 16 --- Total reward: 1.0
Episode: 17 --- Total reward: 0.0
Episode: 18 --- Total reward: 0.0
Episode: 19 --- Total reward: 2.0
Episode: 20 --- Total reward: 2.0
Episode: 21 --- Total reward: 2.0
Episode: 22 --- Total reward: 0.0
Episode: 23 --- Total reward: 2.0
Episode: 24 --- Total reward: 1.0
Episode: 25 --- Total reward: 2.0
Episode: 26 --- Total reward: 1.0
Episode: 27 --- Total reward: 1.0
Episode: 28 --- Total reward: 4.0
Episode: 29 --- Total re