# Flappy Bird

In [1]:
# @ title Imports
from time import sleep
from typing import Tuple

import numpy as np
from IPython.display import clear_output
from deep_rl.environments.flappy_bird import FlappyBird
from deep_rl.shapes import Point, Rectangle
from deep_rl.terminal_renderer import BashRenderer

from src import (
    infer_parameters,
    TreeBuilder,
    get_best_action,
    print_outcomes_stats,
    repr_obs,
    print_successful_decisions,
    print_successful_trajectories,
)

In [2]:
params = {"gravity": 0.05, "force_push": 0.1, "vx": 0.05}
env = FlappyBird(**params, prob_new_bar=1, max_height_bar=0.5)

In [3]:
# @ title Inferring the parameters
gravity, force_push, vx = infer_parameters(env)
assert np.allclose(
    [gravity, force_push, vx],
    (
        params["gravity"],
        params["force_push"],
        params["vx"],
    ),
    rtol=1e-3,
), "Parameters inference failed"

In [4]:
# @title Building the tree
env.reset()
(bird_x, bird_y, bird_vy), bars = env.step(0)[0]
tree_builder = TreeBuilder(bars, gravity, force_push, vx)
outcomes = tree_builder.build_tree(bird_x, bird_y, bird_vy)
print(
    f"\nNumber of leaves computed: {tree_builder.n_steps_computed}\n"
    f"Number of leave computation steps saved: {tree_builder.n_steps_saved}\n"
)
print_outcomes_stats(outcomes)


Number of leaves computed: 684
Number of leave computation steps saved: 3412


Number of favorable outcomes:    594 / 4096  (14.50%)
- Probability of winning when standing still: 4.30%
- Probability of winning when jumping:        10.21%



## Experiments

In [12]:
def launch_experiment(
    predicted_outcomes: np.ndarray,
    rendering_size: Tuple[int, int] = (20, 40),
    time_between_frames: float = 0.1,
    clear_between_frames: bool = False,
    max_steps: int = 1000,
    print_decisions_only: bool = False,
) -> Tuple[int, int]:
    step, total_reward = 0, 0
    renderer = BashRenderer(*rendering_size, clear_fn=lambda: clear_output(wait=True))

    for _ in range(max_steps):
        # playing
        action = get_best_action(predicted_outcomes)
        observation, reward, done = env.step(action)
        print(
            f"action: {action}, reward: {reward}, observation: {repr_obs(observation)}"
        )

        step += 1
        total_reward += reward
        print(f"\nCumulated reward at step {step}: {total_reward:>3}.")
        if done:
            print(f"Simulation ended after {step} steps.")
            break

        # rebuilding the tree
        tree_builder = TreeBuilder(observation[1], gravity, force_push, vx, max_bars=100)
        predicted_outcomes = tree_builder.build_tree(*observation[0])

        # printing some useful information
        print_outcomes_stats(predicted_outcomes)
        if print_decisions_only:
            print_successful_decisions(predicted_outcomes)
        else:
            print_successful_trajectories(
                observation[0][0],
                observation[0][1],
                observation[0][2],
                gravity,
                force_push,
                vx,
                predicted_outcomes,
            )

        # rendering the environment
        if clear_between_frames:
            renderer.clear()
        renderer.draw_list(env.render())
        renderer.draw_title(f"Total reward : {total_reward}")
        renderer.render()
        sleep(time_between_frames)

        background = Rectangle(
            bot_left=Point(x=0, y=0),
            top_right=Point(x=1, y=1),
        )

    return total_reward, step

In [14]:
launch_experiment(outcomes)

action: 1, reward: 0, observation: (0.50, 0.55, 0.05)
 - (1.00, 1.10, 0.38, BOTTOM)

Cumulated reward at step 1:   0.

Number of favorable outcomes:    590 / 4096  (14.40%)
- Probability of winning when standing still: 10.13%
- Probability of winning when jumping:        4.27%


A few successful trajectories:
 -- (0.50, 0.55, 0.05) -> falls to (0.55, 0.55, 0.00) -> jumps to (0.60, 0.60, 0.05) -> falls to (0.65, 0.60, 0.00) -> falls to (0.70, 0.55, -0.05) -> jumps to (0.75, 0.55, 0.00) -> falls to (0.80, 0.50, -0.05) -> falls to (0.85, 0.40, -0.10) -> falls to (0.90, 0.25, -0.15) -> falls to (0.95, 0.05, -0.20) -> falls to (1.00, -0.20, -0.25) -> falls to (1.05, -0.50, -0.30) -> falls to (1.10, -0.85, -0.35)
 -- (0.50, 0.55, 0.05) -> falls to (0.55, 0.55, 0.00) -> falls to (0.60, 0.50, -0.05) -> jumps to (0.65, 0.50, 0.00) -> jumps to (0.70, 0.55, 0.05) -> falls to (0.75, 0.55, 0.00) -> jumps to (0.80, 0.60, 0.05) -> falls to (0.85, 0.60, 0.00) -> falls to (0.90, 0.55, -0.05) -> falls t

(208, 609)

In [6]:
launch_experiment(outcomes)


A few successful trajectories:
 -- 
JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - FALL - FALL
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - FALL
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - FALL
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - FALL - FALL
 -- JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - JUMP - FALL
action: 1, reward: 0, observation: (0.50, 0.45, 0.00)
 - (0.95, 1.05, 0.01, TOP)
 - (1.00, 1.10, 0.18, BOTT

(45, 149)