In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import time
from gymnasium import Env
from gymnasium.spaces import Box, Discrete
from src.flappy import Flappy
from stable_baselines3 import PPO, DQN, A2C, SAC
from collections import deque
from math import log, e

In [5]:
class FlappyBirdEnv(Env):
    def __init__(self, GameClass, frames_skip=3, fps=1000000):
        super().__init__()

        self.observation_space = Box(low=0, high=255, shape=(1, 512, 288), dtype=np.uint8)
        self.action_space = Discrete(2)
        self.game = GameClass(fps)
        self.frames_skip = frames_skip
        self.flap_buffer = deque(maxlen=5)
        self.pass_buffer = deque(maxlen=7)
        self.ep_length = 0

    def step(self, action):
        self.ep_length += 1
        self.flap_buffer.append(action)
        self.pass_buffer.append(1 - action)
        _, reward, done, _ = self.game.step(action)
        for _ in range(self.frames_skip):
            temp_obs, temp_rew, temp_done, temp_info = self.game.step(0)
            reward += temp_rew * 5
            done = done or temp_done

        obs = temp_obs
        info = temp_info
        if done:
            reward = -200
            
        reward -= 1.2 ** sum(self.flap_buffer)
        reward -= 1.05 ** sum(self.pass_buffer)
        reward += min(log(self.ep_length, e), 10)
            
        return self.process_observation(obs), reward, done, done, info

    def reset(self, **kwargs):
        self.flap_buffer = deque(maxlen=5)
        self.pass_buffer = deque(maxlen=7)
        self.ep_length = 0
        obs, info = self.game.reset()
        return self.process_observation(obs), info
    
    def process_observation(self, obs):
        return np.expand_dims(cv2.cvtColor(obs, cv2.COLOR_BGR2GRAY).transpose(), 0)

In [6]:
env = FlappyBirdEnv(Flappy, frames_skip=2)
model = PPO("CnnPolicy", env, verbose=1, n_steps=512)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [7]:
model.learn(100000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24       |
|    ep_rew_mean     | -210     |
| time/              |          |
|    fps             | 115      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 512      |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 24          |
|    ep_rew_mean          | -212        |
| time/                   |             |
|    fps                  | 68          |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.027030695 |
|    clip_fraction        | 0.593       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.628      |
|    explained_variance   | 0.000343    |
|    learning_rate        | 0.

KeyboardInterrupt: 

: 