In [1]:
!pip install pygame
!git clone https://github.com/elitcenk/flappy-bird-agent.git
  
import os
os.chdir('flappy-bird-agent')

Collecting pygame
[?25l  Downloading https://files.pythonhosted.org/packages/8e/24/ede6428359f913ed9cd1643dd5533aefeb5a2699cc95bea089de50ead586/pygame-1.9.6-cp36-cp36m-manylinux1_x86_64.whl (11.4MB)
[K     |████████████████████████████████| 11.4MB 2.4MB/s 
[?25hInstalling collected packages: pygame
Successfully installed pygame-1.9.6
Cloning into 'flappy-bird-agent'...
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 41 (delta 16), reused 37 (delta 15), pack-reused 0[K
Unpacking objects: 100% (41/41), done.


Colab don't support monitor. For this we use headless.

In [0]:
os.putenv('SDL_VIDEODRIVER', 'fbcon')
os.environ["SDL_VIDEODRIVER"] = "dummy"

print("Headless done") 

FlappyBird game path added.

In [0]:
import sys

sys.path.append("game/")

print("Game added.") 

Configuration of DQN of Flappy Bird.

In [0]:
ACTIONS = 2  # number of valid actions
GAMMA = 0.99  # decay rate of past observations
OBSERVATION = 100.  # timesteps to observe before training
N_EP = 10000   # number of episode
N_SAVE = 500  # every N_SAVE number save the model
EXPLORE = 10000.  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001  # final value of epsilon
INITIAL_EPSILON = 0.1  # starting value of epsilon
REPLAY_MEMORY = 5000  # number of previous transitions to remember
BATCH = 32  # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4

IMG_ROWS, IMG_COLS = 80, 80
IMG_CHANNELS = 4  # Stacked 4 frames
print("imported successfully") 

Import some library

In [5]:
import skimage as skimage
from skimage import transform, color, exposure

import game.wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque

import json
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.optimizers import Adam

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.


Added DQN agent of Flappy Bird

In [0]:
class DQNAgent():

    def __init__(self):
        self.game_state = game.GameState()
        self.memory = deque(maxlen=REPLAY_MEMORY)
        self.scores = deque(maxlen=100)
        self.build_model()

    def build_model(self):
        print("Now we build the model")
        self.model = Sequential()
        self.model.add(Conv2D(32, (8, 8), input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS), strides=(4, 4), padding="same"))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(64, (4, 4), strides=(2, 2), padding="same"))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(64, (3, 3), strides=(1, 1), padding="same"))
        self.model.add(Activation('relu'))
        self.model.add(Flatten())
        self.model.add(Dense(512))
        self.model.add(Activation('relu'))
        self.model.add(Dense(ACTIONS))
        self.model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE))
        print("We finish building the model")

    def train(self):
        # We go to training mode
        self.OBSERVE = OBSERVATION
        self.epsilon = INITIAL_EPSILON
        self.run_model()

    def evaluate(self):
        self.OBSERVE = 999999999  # We keep observe, never train
        self.epsilon = FINAL_EPSILON
        self.restore_model()
        self.run_model()

    def restore_model(self):
        weight_path = F"/content/gdrive/My Drive/model.h5"
        print("Now we load weight")
        self.model.load_weights(weight_path)
        self.model.compile(loss='mse', optimizer=Adam(lr=LEARNING_RATE))
        print("Weight load successfully")

    def run_model(self):
        for step in range(N_EP):
            score = 0
            current_stack = self.create_first_stack()
            while True:
                action, action_index = self.act(current_stack, step)
                # run the selected action and observed next state and reward
                frame, reward, terminal = self.game_state.frame_step(action)
                frame = self.preprocess(frame)
                frame = frame.reshape(1, frame.shape[0], frame.shape[1], 1)  # 1x[IMG_COLS]x[IMG_ROWS]x1
                self.remember(action_index, current_stack, reward, terminal, frame)
                current_stack = np.append(frame, current_stack[:, :, :, :3], axis=3)
                Q_sa, loss = self.replay(step)
                score = score + reward
                if terminal:
                    break
            self.scores.append(score)
            print("Episode {} score: {}".format(step + 1, score))
            self.mean_score = np.mean(self.scores)
            self.print(step, score)

    def print(self, step, score):
        if (step + 1) % 5 == 0:
            print("Episode {}, score: {}, exploration at {}%, mean of last 100 episodes was {}".format(step + 1, score, self.epsilon, self.mean_score))

        if (step + 1) % N_SAVE == 0 and step > 0:
            self.save_model()

    def save_model(self):
        print("Now we save model")
        json_path = F"/content/gdrive/My Drive/model.json"
        weight_path = F"/content/gdrive/My Drive/model.h5"
        self.model.save_weights(weight_path, overwrite=True)
        with open(json_path, "w") as outfile:
            json.dump(self.model.to_json(), outfile)

    def remember(self, action_index, current_stack, reward, terminal, frame):
        # store the transition in memory
        self.memory.append((current_stack, action_index, reward, frame, terminal))

    def replay(self, step):
        Q_sa = 0
        loss = 0
        # only train if done observing
        if step > self.OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(self.memory, BATCH)
            # Now we do the experience replay
            current_stack, action_index, reward, frame, terminal = zip(*minibatch)
            current_stack = np.concatenate(current_stack)
            frame = np.concatenate(frame)
            next_stack = np.append(frame, current_stack[:, :, :, :3], axis=3)
            targets = self.model.predict(current_stack)
            Q_sa = self.model.predict(next_stack)
            targets[range(BATCH), action_index] = reward + GAMMA * np.max(Q_sa, axis=1) * np.invert(terminal)
            loss += self.model.train_on_batch(current_stack, targets)
        return Q_sa, loss

    def act(self, frame_stack, step):
        action_index = 0
        a_t = np.zeros([ACTIONS])
        # choose an action epsilon greedy
        if step % FRAME_PER_ACTION == 0:
            if random.random() <= self.epsilon:
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:
                q = self.model.predict(frame_stack)  # input a stack of [IMG_CHANNELS] images, get the prediction
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[max_Q] = 1

        # We reduced the epsilon gradually
        if self.epsilon > FINAL_EPSILON and step > self.OBSERVE:
            self.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
        return a_t, action_index

    def preprocess(self, frame):
        frame = skimage.color.rgb2gray(frame)
        frame = skimage.transform.resize(frame, (IMG_ROWS, IMG_COLS))
        frame = skimage.exposure.rescale_intensity(frame, out_range=(0, 255))
        return frame / 255.0

    def create_first_stack(self):
        # get the first state by doing nothing and preprocess the image
        do_nothing = np.zeros(ACTIONS)
        do_nothing[0] = 1
        frame, reward, terminal = self.game_state.frame_step(do_nothing)
        frame = self.preprocess(frame)
        frame_stack = np.stack((frame, frame, frame, frame), axis=2)
        # In Keras, need to reshape
        frame_stack = frame_stack.reshape(1, frame_stack.shape[0], frame_stack.shape[1], frame_stack.shape[2])  # 1*[IMG_COLS]x[IMG_ROWS]*[IMG_CHANNELS]
        return frame_stack


Mount the Google Drive for save and load model.

In [7]:
from google.colab import drive
drive.mount('/content/gdrive/')

print("mounded GDrive")  

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive/
mounded GDrive


Create agent and train. If yo want to evaluate you call evaluate method.

In [0]:
agent = DQNAgent()
agent.train()

Now we build the model




We finish building the model





  image = (image - imin) / float(imax - imin)





Episode 1 score: -0.6699999999999999
Episode 2 score: -0.6799999999999999
Episode 3 score: -0.4299999999999997
Episode 4 score: -0.83
Episode 5 score: -0.83
Episode 5, score: -0.83, exploration at 0.1%, mean of last 100 episodes was -0.688
Episode 6 score: -0.83
Episode 7 score: -0.6399999999999999
Episode 8 score: -0.83
Episode 9 score: 0.6300000000000003
Episode 10 score: -0.83
Episode 10, score: -0.83, exploration at 0.1%, mean of last 100 episodes was -0.594
Episode 11 score: -0.83
Episode 12 score: -0.5199999999999998
Episode 13 score: -0.6599999999999999
Episode 14 score: 0.9200000000000006
Episode 15 score: -0.6399999999999999
Episode 15, score: -0.6399999999999999, exploration at 0.1%, mean of last 100 episodes was -0.5113333333333332
Episode 16 score: -0.83
Episode 17 score: -0.5199999999999998
Episode 18 score: -0.83
Episode 19 score: -0.83
Episode 20 score: -0.83
Episode 20, score: -0.83, exploration at 0.1%, mean of last 100 episodes was -0.5754999999999998
Episode 21 sc