# Flappy Bird

In [1]:
# @ title Imports
from time import sleep

import numpy as np
from IPython.display import clear_output
from deep_rl.environments.flappy_bird import FlappyBird
from deep_rl.shapes import Point, Rectangle
from deep_rl.terminal_renderer import BashRenderer

from src import (
    infer_parameters,
    TreeBuilder,
    get_best_action,
    print_outcomes_stats,
    checkpoint,
)

In [2]:
params = {"gravity": 0.05, "force_push": 0.1, "vx": 0.05}
env = FlappyBird(**params, prob_new_bar=1, max_height_bar=0.5)

In [3]:
# @ title Inferring the parameters
gravity, force_push, vx = infer_parameters(env)
assert np.allclose(
    [gravity, force_push, vx],
    (
        params["gravity"],
        params["force_push"],
        params["vx"],
    ),
    rtol=1e-3,
), "Parameters inference failed"

In [4]:
# @title Building the tree
env.reset()
(bird_x, bird_y, bird_vy), bars = env.step(0)[0]
tree_builder = TreeBuilder(bars, gravity, force_push, vx)
outcomes = tree_builder.build_tree(bird_x, bird_y, bird_vy)
print(
    f"\nNumber of leaves computed: {tree_builder.n_steps_computed}\n"
    f"Number of leave computation steps saved: {tree_builder.n_steps_saved}\n"
)
print_outcomes_stats(outcomes)


Number of leaves computed: 980
Number of leave computation steps saved: 3116


Number of favorable outcomes:    892 / 4096  (21.78%)
- Probability of winning when standing still: 7.28%
- Probability of winning when jumping:        14.50%



In [23]:
ROWS, COLS = 15, 30
renderer = BashRenderer(ROWS, COLS, clear_fn=lambda: clear_output(wait=True))

step, total_reward = 0, 0
max_steps = 1000
time_between_frame = 0.1

for _ in range(max_steps):

    action = get_best_action(outcomes)
    observation, reward, done = env.step(action)
    print(f"action: {action}, reward: {reward}, observation: {str(observation)}")

    step += 1
    total_reward += reward

    print(f"Cumulated reward at step {step}: {total_reward:>3}.")
    if done:
        print(f"Simulation ended after {step} steps.")
        break
    tree_builder = TreeBuilder(observation[1], gravity, force_push, vx)
    outcomes = tree_builder.build_tree(*observation[0])
    print_outcomes_stats(outcomes)

    renderer.clear()
    renderer.draw_list(env.render())
    renderer.draw_title(f"Total reward : {total_reward}")
    renderer.render()
    sleep(time_between_frame)

    background = Rectangle(
        bot_left=Point(x=0, y=0),
        top_right=Point(x=1, y=1),
    )

action: 0, reward: 0, observation: ((0.5, 0.45, -0.05), [(1.0, 1.1, 0.23749089358906894, True)])
Cumulated reward at step 1:   0.

Number of favorable outcomes:    789 / 4096  (19.26%)
- Probability of winning when standing still: 6.64%
- Probability of winning when jumping:        12.62%

action: 1, reward: 0, observation: ((0.5, 0.45, 0.0), [(0.95, 1.05, 0.23749089358906894, True)])
Cumulated reward at step 2:   0.

Number of favorable outcomes:    500 / 2048  (24.41%)
- Probability of winning when standing still: 12.45%
- Probability of winning when jumping:        11.96%

action: 0, reward: 0, observation: ((0.5, 0.4, -0.05), [(0.8999999999999999, 0.9999999999999999, 0.23749089358906894, True)])
Cumulated reward at step 3:   0.

Number of favorable outcomes:    255 / 1024  (24.90%)
- Probability of winning when standing still: 7.81%
- Probability of winning when jumping:        17.09%

action: 1, reward: 0, observation: ((0.5, 0.4, 0.0), [(0.8499999999999999, 0.9499999999999998, 0.