# Instructions To Run and Background 

This notebook represents the work done to execute **Q-Learning** on the Flappy Bird enviornment. Due to the niche nature of the Flappy Bird game, assets/ folder is included as this loads the game assets, and the notebook will not be able to run without it.

The point of this notebook is demonstrate the training process as well as how to get started with testing an existing model. To begin, install the following: 
1. Have a Python 3.8.5 Environment Installed, as well as Tensorflow, Pygame. 

# First, we need to set up the PyGame enviornment. 
The source code for this is attributed to: https://github.com/sourabhv/FlapPyBird

In [None]:
import pickle
import random
from matplotlib import pyplot as plt
import numpy as np 


# disable rendering 
import os
os.environ['SDL_VIDEODRIVER']='dummy'


# seed random
random.seed(999)

# seed numpy 
np.random.seed(999)

class Bot:

    def __init__(self):
        self.alpha = 0.7 # Constant Learning rate
        self.epsilonFactor = 0.8 # The reduction factor for epsilon after every sampleRate
        self.discount = .9 # Discount Rate
        self.sampleRate = 1 # The dynamic variables change after every sampleRate and Q-Table + scores are stored in files
        self.fileName = 'q-table.pkl' # Q-Table file name
        self.scoreFile = 'scores.pkl' # Scores file name

        self.reward_list = [] 

        self.score_cache = [] 
        
        # Try to open a pretrained model
        # If and only if both files exist, we use the pretrained model
        try:
            print(self.scoreFile)
            print(self.fileName)
            with open(self.fileName, 'rb') as f:
                self.Q = pickle.load(f)

            with open(self.scoreFile, 'rb') as f:
                # print(len(pickle.load(f)))
                self.counter = len(pickle.load(f))
                self.epsilon = 0.001 * pow(self.epsilonFactor,self.counter/self.sampleRate)
        # Else start fresh training
        except FileNotFoundError:
            print('New Learning')
            self.Q = {}
            self.counter = 0
            self.epsilon = 0.2 #  0.1    # 0.001, 0.2, 0.4 

        self.pipeReward = -15 # Reward for crashing into an upper pipe
        self.sumScore = 0 # Sum of scores for a sample
        self.scoreList = [] # Scores List for a sample
        plt.ion()

    def maxQ(self, state, testing=False):
        # Chooses the higher Q-value with a probability of (1-epsilon)
        max_val = max(self.Q[state])
        max_act = self.Q[state].index(max_val)

        # if we're testing, then we should always take the best action
        if testing:
            return max_act, max_val

        if random.random()<self.epsilon:
            max_act = int(not max_act)
            max_val = self.Q[state][max_act]
        return max_act, max_val

    def appendState(self, state):
        # Add state to Q table if not already added
        if state not in self.Q:
            self.Q[state]=[]
            self.Q[state].append(0)
            self.Q[state].append(0)

# exp is a tuple(old_state, best_action, reward, new_state)
    def updateQ(self, exp, upCrash, score):
        self.sample(score, exp) # Update the sample statistics for each game played
        
        exp.reverse() # Reverse the list of experiences

        # If crashed with upper pipe then tax the last jump that caused it
        if upCrash:
            for i, xp in enumerate(exp):
                if xp[1]==1:
                    temp = list(exp[i])
                    temp[2] = self.pipeReward
                    exp[i] = tuple(temp)
                    break
        # For each entry calculate and update Q value
        for xp in exp:
            s = xp[0]
            a = xp[1]
            r = xp[2]
            _, fut_r = self.maxQ(xp[3])
            self.Q[s][a] *= 1 - self.alpha
            self.Q[s][a] += self.alpha * (r + self.discount*fut_r )

    def sample(self, score, exp, testing=False):
        # Updates the sample statistics for each game played

        self.counter += 1

        reward_sum = 0 

        for element in exp: 
            reward_sum += element[2]

        self.reward_list.append(reward_sum)
        self.scoreList.append(score)
        self.score_cache.append(score)

        # prints as well the reward, average reward, and standard deviation of the reward
        string = "Episode: " + str(self.counter) + ", Score: " + str(score) + ", Average Score: " + str(np.mean(self.score_cache)) + ", Reward: " + str(reward_sum) + ", Average Reward: " + str(np.mean(self.reward_list)) + "\n"
        # with open(self.fileName + "asdf", 'rb') as f:
            # self.Q = pickle.load(f)
        print(string)

        if testing == False: 
            # save the string variable into a file, cache_file.txt
            with open("cache_file_0.txt", "a") as f:
                f.write(string)
        if testing == True: 
            with open("cache_file_testing.txt", "a") as f:
                f.write(string)      
        
        self.sumScore+= score
        
        if testing == False: 
            self.saveQ()
            self.dumpScores()
        self.epsilon *= self.epsilonFactor
        # plt.plot(self.counter, self.sumScore/self.sampleRate, 'ro')
        self.sumScore = 0

    def dumpScores(self):
        # Save the scores list to a file and reset it for next sample
        try:
            with open(self.scoreFile, 'rb') as f:
                temp = pickle.load(f)
            temp+=(self.scoreList)
        except FileNotFoundError:
            temp = self.scoreList

        with open(self.scoreFile,'wb') as f:
            pickle.dump(temp,f)

        self.scoreList=[] # Resetting the scores list for next batch of sample

    def saveQ(self):
        # Save the Q-Table to a file
        with open(self.fileName,'wb') as f:
            pickle.dump(self.Q, f)


# Below, you will find the `testing` variable. [[SKIP THIS SECTION IF YOU WOULD LIKE TO RUN THE TRAINING PROCESS]]

It's set to true initally. This is becuase, firstly, I will demonstrate the testing process to show reproducibility. Then, I will walk you through the training process.

The network weights for the 130,000,000 frame of the seed 1001 are provided in the saved_networks folder. I didn't want to make the .zip or GitHub too large, so I just provided the best performing weights. Of course, more weights are available, please email me at: aadarsh.jha@vanderbilt.edu

In [None]:
testing = True

In [None]:
from itertools import cycle
import random
import sys
import pickle

# disable rendering 
import os
os.environ['SDL_VIDEODRIVER']='dummy'

import pygame
from pygame.locals import *

# seed random
random.seed(999)

FPS = 10000000
SCREENWIDTH  = 288
SCREENHEIGHT = 512
# amount by which base can maximum shift to left
PIPEGAPSIZE  = 100 # gap between upper and lower part of pipe
BASEY        = SCREENHEIGHT * 0.79
# image, sound and hitmask  dicts
IMAGES, SOUNDS, HITMASKS = {}, {}, {}

# Where the pipes start
STARTINGPIPEX = SCREENWIDTH - 100

# Initialise bot
bot = Bot()

# Initialse the rewards and discretisation
# Discretisation divides the screen into a grid value * value to ease off computations
dieReward = -10
scoreReward = 1
discretisation = 4

# list of all possible players (tuple of 3 positions of flap)
PLAYERS_LIST = (
    # red bird
    (
        'assets/sprites/redbird-upflap.png',
        'assets/sprites/redbird-midflap.png',
        'assets/sprites/redbird-downflap.png',
    ),
    # blue bird
    (
        # amount by which base can maximum shift to left
        'assets/sprites/bluebird-upflap.png',
        'assets/sprites/bluebird-midflap.png',
        'assets/sprites/bluebird-downflap.png',
    ),
    # yellow bird
    (
        'assets/sprites/yellowbird-upflap.png',
        'assets/sprites/yellowbird-midflap.png',
        'assets/sprites/yellowbird-downflap.png',
    ),
)

# list of backgrounds
BACKGROUNDS_LIST = (
    'assets/sprites/background-day.png',
    'assets/sprites/background-night.png',
)

# list of pipes
PIPES_LIST = (
    'assets/sprites/pipe-green.png',
    'assets/sprites/pipe-red.png',
)


try:
    xrange
except NameError:
    xrange = range

def main():
    global SCREEN, FPSCLOCK

    counter = 0 

    if testing: 
        print("Testing Mode")
    else: 
        print("Training Mode")

    pygame.init()
    FPSCLOCK = pygame.time.Clock()
    SCREEN = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT))
    pygame.display.set_caption('FlapPy Bird Q-Bot') # Set the window name

    # numbers sprites for score display
    IMAGES['numbers'] = (
        pygame.image.load('assets/sprites/0.png').convert_alpha(),
        pygame.image.load('assets/sprites/1.png').convert_alpha(),
        pygame.image.load('assets/sprites/2.png').convert_alpha(),
        pygame.image.load('assets/sprites/3.png').convert_alpha(),
        pygame.image.load('assets/sprites/4.png').convert_alpha(),
        pygame.image.load('assets/sprites/5.png').convert_alpha(),
        pygame.image.load('assets/sprites/6.png').convert_alpha(),
        pygame.image.load('assets/sprites/7.png').convert_alpha(),
        pygame.image.load('assets/sprites/8.png').convert_alpha(),
        pygame.image.load('assets/sprites/9.png').convert_alpha()
    )

    # game over sprite
    IMAGES['gameover'] = pygame.image.load('assets/sprites/gameover.png').convert_alpha()
    # message sprite for welcome screen
    IMAGES['message'] = pygame.image.load('assets/sprites/message.png').convert_alpha()
    # base (ground) sprite
    IMAGES['base'] = pygame.image.load('assets/sprites/base.png').convert_alpha()

    # sounds
    if 'win' in sys.platform:
        soundExt = '.wav'
    else:
        soundExt = '.ogg'

    SOUNDS['die']    = pygame.mixer.Sound('assets/audio/die' + soundExt)
    SOUNDS['hit']    = pygame.mixer.Sound('assets/audio/hit' + soundExt)
    SOUNDS['point']  = pygame.mixer.Sound('assets/audio/point' + soundExt)
    SOUNDS['swoosh'] = pygame.mixer.Sound('assets/audio/swoosh' + soundExt)
    SOUNDS['wing']   = pygame.mixer.Sound('assets/audio/wing' + soundExt)

    while True:

        counter +=1 



        if counter > 10000 and testing == False: 
            print("Training Finished")
            break
        
        if counter > 10 and testing == True: 
            print("Testing Finished")
            break 
        # select random background sprites
        randBg = 0
        IMAGES['background'] = pygame.image.load(BACKGROUNDS_LIST[randBg]).convert()

        # select random player sprites
        randPlayer = 0
        IMAGES['player'] = (
            pygame.image.load(PLAYERS_LIST[randPlayer][0]).convert_alpha(),
            pygame.image.load(PLAYERS_LIST[randPlayer][1]).convert_alpha(),
            pygame.image.load(PLAYERS_LIST[randPlayer][2]).convert_alpha(),
        )

        # select random pipe sprites
        pipeindex = 0
        IMAGES['pipe'] = (
            pygame.transform.rotate(
                pygame.image.load(PIPES_LIST[pipeindex]).convert_alpha(), 180),
            pygame.image.load(PIPES_LIST[pipeindex]).convert_alpha(),
        )

        # hitmask for pipes
        HITMASKS['pipe'] = (
            getHitmask(IMAGES['pipe'][0]),
            getHitmask(IMAGES['pipe'][1]),
        )

        # hitmask for player
        HITMASKS['player'] = (
            getHitmask(IMAGES['player'][0]),
            getHitmask(IMAGES['player'][1]),
            getHitmask(IMAGES['player'][2]),
        )

        movementInfo = showWelcomeAnimation()
        crashInfo = mainGame(movementInfo)
        showGameOverScreen(crashInfo)


def showWelcomeAnimation():
    """Shows welcome screen animation of flappy bird"""
    # index of player to blit on screen
    playerIndex = 0
    playerIndexGen = cycle([0, 1, 2, 1])
    # iterator used to change playerIndex after every 5th iteration
    loopIter = 0

    playerx = int(SCREENWIDTH * 0.2)
    playery = int((SCREENHEIGHT - IMAGES['player'][0].get_height()) / 2)

    messagex = int((SCREENWIDTH - IMAGES['message'].get_width()) / 2)
    messagey = int(SCREENHEIGHT * 0.12)

    basex = 0
    # amount by which base can maximum shift to left
    baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width()

    # player shm for up-down motion on welcome screen
    playerShmVals = {'val': 0, 'dir': 1}

    return {
                    'playery': playery + playerShmVals['val'],
                    'basex': basex,
                    'playerIndexGen': playerIndexGen,
                }


def mainGame(movementInfo):

    score = playerIndex = loopIter = 0
    playerIndexGen = movementInfo['playerIndexGen']
    playerx, playery = int(SCREENWIDTH * 0.2), movementInfo['playery']

    basex = movementInfo['basex']
    baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width()

    # get 2 new pipes to add to upperPipes lowerPipes list
    newPipe1 = getRandomPipe()
    newPipe2 = getRandomPipe()

    # list of upper pipes
    upperPipes = [
        {'x': STARTINGPIPEX, 'y': newPipe1[0]['y']},
        {'x': STARTINGPIPEX + (SCREENWIDTH / 2), 'y': newPipe2[0]['y']},
    ]

    # list of lowerpipe
    lowerPipes = [
        {'x': STARTINGPIPEX, 'y': newPipe1[1]['y']},
        {'x': STARTINGPIPEX + (SCREENWIDTH / 2), 'y': newPipe2[1]['y']},
    ]

    pipeVelX = -4

    # player velocity, max velocity, downward accleration, accleration on flap
    playerVelY    =  -9   # player's velocity along Y, default same as playerFlapped
    playerMaxVelY =  10   # max vel along Y, max descend speed
    playerMinVelY =  -8   # min vel along Y, max ascend speed
    playerAccY    =   1   # players downward accleration
    playerRot     =  45   # player's rotation
    playerVelRot  =   3   # angular speed
    playerRotThr  =  20   # rotation threshold
    playerFlapAcc =  -9   # players speed on flapping
    playerFlapped = False # True when player flaps

    # Experience tuple list
    exp = []

    # Active Pipe
    activePipe = 0

    while True:
        r = 0
        s = (round((playery-lowerPipes[activePipe]['y'])/discretisation), round((lowerPipes[activePipe]['x']-playerx)/discretisation), playerVelY)
        bot.appendState(s)
        max_act, max_val = bot.maxQ(s)

        for event in pygame.event.get():
            if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE):
                bot.saveQ()
                pygame.quit()
                sys.exit()

        if max_act:
                if playery > -2 * IMAGES['player'][0].get_height():
                    playerVelY = playerFlapAcc
                    playerFlapped = True
                    #SOUNDS['wing'].play()

        # Increment score for a new pipe passed
        playerMidPos = playerx + IMAGES['player'][0].get_width() / 2
        #Iterate through each of the pipes
        for pipe in upperPipes:
            pipeMidPos = pipe['x'] + IMAGES['pipe'][0].get_width() / 2
            if pipeMidPos <= playerMidPos < pipeMidPos + 4: #Increment score only if the pipe has 'just' passed
                score += 1
                activePipe+=1
                r+=scoreReward
                #SOUNDS['point'].play()

        # playerIndex basex change
        if (loopIter + 1) % 3 == 0:
            playerIndex = next(playerIndexGen)
        loopIter = (loopIter + 1) % 30
        basex = -((-basex + 100) % baseShift)

        # rotate the player
        if playerRot > -90:
            playerRot -= playerVelRot

        # player's movement
        if playerVelY < playerMaxVelY and not playerFlapped:
            playerVelY += playerAccY
        if playerFlapped:
            playerFlapped = False

            # more rotation to cover the threshold (calculated in visible rotation)
            playerRot = 45

        # Drop the player due to gravity
        playerHeight = IMAGES['player'][playerIndex].get_height()
        playery += min(playerVelY, BASEY - playery - playerHeight) # Check if drop height is more than the space between player and base

        # move pipes to left
        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            uPipe['x'] += pipeVelX
            lPipe['x'] += pipeVelX

        # add new pipe when first pipe is about to touch left of screen
        if 0 < upperPipes[0]['x'] < 5:
            newPipe = getRandomPipe()
            upperPipes.append(newPipe[0])
            lowerPipes.append(newPipe[1])

        # remove first pipe if its out of the screen
        if upperPipes[0]['x'] < -IMAGES['pipe'][0].get_width():
            upperPipes.pop(0)
            lowerPipes.pop(0)
            activePipe-=1

        # check for crash here
        crashTest = checkCrash({'x': playerx, 'y': playery, 'index': playerIndex},
                               upperPipes, lowerPipes)
        
        # Check for crash, if crashed then assign die reward
        if crashTest[0]:
            r += dieReward

        # Create experience tuple
        s2 = (round((playery-lowerPipes[activePipe]['y'])/discretisation), round((lowerPipes[activePipe]['x']-playerx)/discretisation), playerVelY)
        bot.appendState(s2)
        exp.append((s, max_act, r, s2))

        # If player has crashed, return to showGameOverScreen()
        # Update the Q table values using exp list and pipe crash
        if crashTest[0]:
            if testing == False: 
                bot.updateQ(exp, not crashTest[1], score)
                return {
                    """
                    'y': playery,
                    'groundCrash': crashTest[1],
                    'basex': basex,
                    'upperPipes': upperPipes,
                    'lowerPipes': lowerPipes,
                    'score': score,
                    'playerVelY': playerVelY,
                    'playerRot': playerRot
                    """
                }
            else: 
                # if we're testing, no need to update Q
                # simply sample
                bot.sample(score, exp, True)
                return {
                    """
                    'y': playery,
                    'groundCrash': crashTest[1],
                    'basex': basex,
                    'upperPipes': upperPipes,
                    'lowerPipes': lowerPipes,
                    'score': score,
                    'playerVelY': playerVelY,
                    'playerRot': playerRot
                    """
                }
                

        # draw sprites
        SCREEN.blit(IMAGES['background'], (0,0))

        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            SCREEN.blit(IMAGES['pipe'][0], (uPipe['x'], uPipe['y']))
            SCREEN.blit(IMAGES['pipe'][1], (lPipe['x'], lPipe['y']))

        SCREEN.blit(IMAGES['base'], (basex, BASEY))
        # print score so player overlaps the score
        showScore(score)

        # Player rotation has a threshold
        visibleRot = playerRotThr
        if playerRot <= playerRotThr:
            visibleRot = playerRot
        
        playerSurface = pygame.transform.rotate(IMAGES['player'][playerIndex], visibleRot)
        SCREEN.blit(playerSurface, (playerx, playery))

        pygame.display.update()
        FPSCLOCK.tick(FPS)


def showGameOverScreen(crashInfo):
    return

def playerShm(playerShm):
    """oscillates the value of playerShm['val'] between 8 and -8"""
    if abs(playerShm['val']) == 8:
        playerShm['dir'] *= -1

    if playerShm['dir'] == 1:
         playerShm['val'] += 1
    else:
        playerShm['val'] -= 1

# Returns a list of two pipes, upper and lower in the form of x-y coordinates
def getRandomPipe():
    """returns a randomly generated pipe"""
    # y of gap between upper and lower pipe
    gapY = random.randrange(0, int(BASEY * 0.6 - PIPEGAPSIZE))
    gapY += int(BASEY * 0.2)
    pipeHeight = IMAGES['pipe'][0].get_height()
    pipeX = SCREENWIDTH + 10

    return [
        {'x': pipeX, 'y': gapY - pipeHeight},  # upper pipe
        {'x': pipeX, 'y': gapY + PIPEGAPSIZE}, # lower pipe
    ]


def showScore(score):
    """displays score in center of screen"""
    scoreDigits = [int(x) for x in list(str(score))]
    totalWidth = 0 # total width of all numbers to be printed

    for digit in scoreDigits:
        totalWidth += IMAGES['numbers'][digit].get_width()

    Xoffset = (SCREENWIDTH - totalWidth) / 2

    for digit in scoreDigits:
        SCREEN.blit(IMAGES['numbers'][digit], (Xoffset, SCREENHEIGHT * 0.1))
        Xoffset += IMAGES['numbers'][digit].get_width()

# Takes player index to get the hitmask
# Takes list of upperpipes and lower pipes
# Returns a list [a, b]; a is true if collision has occured and b is false for upper pipe collision else true
def checkCrash(player, upperPipes, lowerPipes):
    """returns True if player collders with base or pipes."""
    pi = player['index']
    player['w'] = IMAGES['player'][0].get_width()
    player['h'] = IMAGES['player'][0].get_height()

    # if player crashes into ground
    if player['y'] + player['h'] >= BASEY - 1:
        return [True, True]
    else:

        playerRect = pygame.Rect(player['x'], player['y'],
                      player['w'], player['h'])
        pipeW = IMAGES['pipe'][0].get_width()
        pipeH = IMAGES['pipe'][0].get_height()

        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            # upper and lower pipe rects
            uPipeRect = pygame.Rect(uPipe['x'], uPipe['y'], pipeW, pipeH)
            lPipeRect = pygame.Rect(lPipe['x'], lPipe['y'], pipeW, pipeH)

            # player and upper/lower pipe hitmasks
            pHitMask = HITMASKS['player'][pi]
            uHitmask = HITMASKS['pipe'][0]
            lHitmask = HITMASKS['pipe'][1]

            # if bird collided with upipe or lpipe
            uCollide = pixelCollision(playerRect, uPipeRect, pHitMask, uHitmask)
            lCollide = pixelCollision(playerRect, lPipeRect, pHitMask, lHitmask)

            if uCollide:
                return [True, False]
            elif lCollide:
                return [True, True]

    return [False, False]

def pixelCollision(rect1, rect2, hitmask1, hitmask2):
    """Checks if two objects collide and not just their rects"""
    rect = rect1.clip(rect2)

    if rect.width == 0 or rect.height == 0:
        return False

    x1, y1 = rect.x - rect1.x, rect.y - rect1.y
    x2, y2 = rect.x - rect2.x, rect.y - rect2.y

    for x in xrange(rect.width):
        for y in xrange(rect.height):
            if hitmask1[x1+x][y1+y] and hitmask2[x2+x][y2+y]:
                return True
    return False

def getHitmask(image):
    """returns a hitmask using an image's alpha."""
    mask = []
    for x in xrange(image.get_width()):
        mask.append([])
        for y in xrange(image.get_height()):
            mask[x].append(bool(image.get_at((x,y))[3]))
    return mask


if __name__ == '__main__':
    main()

# Above, the testing is executed. 

To interpret this a bit better, here's a summary of the process:
* A log is produced everytime the game is reset (e.g., the bird dies). It can be found in `cache_file_testing.txt` in the same directory as this notebook. 
* Reward, and average reward are computed for reference. 
* What is most important to note is the Flaps, and Average Flaps, as they represent the number of times the bird flaps, which is the primary, and only, metric of how well the user is at playing the game. 

Referencing the paper, one can see how the maximal flaps are: 109.9, and the average flaps are: 296. 

# Now, we can move onto training. 

* We switch the `testing` variable to `True` to enable the training process.
* It is recommended to use a GPU for training. CPU is too slow. 
* The block from above is copied again, since its the same code yet again to train, except we set the `testing` variable to `False`.

In [None]:
testing = False

In [None]:
from itertools import cycle
import random
import sys
import pickle

# disable rendering 
import os
os.environ['SDL_VIDEODRIVER']='dummy'

import pygame
from pygame.locals import *

# seed random
random.seed(999)

FPS = 10000000
SCREENWIDTH  = 288
SCREENHEIGHT = 512
# amount by which base can maximum shift to left
PIPEGAPSIZE  = 100 # gap between upper and lower part of pipe
BASEY        = SCREENHEIGHT * 0.79
# image, sound and hitmask  dicts
IMAGES, SOUNDS, HITMASKS = {}, {}, {}

# Where the pipes start
STARTINGPIPEX = SCREENWIDTH - 100

# Initialise bot
bot = Bot()

# Initialse the rewards and discretisation
# Discretisation divides the screen into a grid value * value to ease off computations
dieReward = -10
scoreReward = 1
discretisation = 4

# list of all possible players (tuple of 3 positions of flap)
PLAYERS_LIST = (
    # red bird
    (
        'assets/sprites/redbird-upflap.png',
        'assets/sprites/redbird-midflap.png',
        'assets/sprites/redbird-downflap.png',
    ),
    # blue bird
    (
        # amount by which base can maximum shift to left
        'assets/sprites/bluebird-upflap.png',
        'assets/sprites/bluebird-midflap.png',
        'assets/sprites/bluebird-downflap.png',
    ),
    # yellow bird
    (
        'assets/sprites/yellowbird-upflap.png',
        'assets/sprites/yellowbird-midflap.png',
        'assets/sprites/yellowbird-downflap.png',
    ),
)

# list of backgrounds
BACKGROUNDS_LIST = (
    'assets/sprites/background-day.png',
    'assets/sprites/background-night.png',
)

# list of pipes
PIPES_LIST = (
    'assets/sprites/pipe-green.png',
    'assets/sprites/pipe-red.png',
)


try:
    xrange
except NameError:
    xrange = range

def main():
    global SCREEN, FPSCLOCK

    counter = 0 

    if testing: 
        print("Testing Mode")
    else: 
        print("Training Mode")

    pygame.init()
    FPSCLOCK = pygame.time.Clock()
    SCREEN = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT))
    pygame.display.set_caption('FlapPy Bird Q-Bot') # Set the window name

    # numbers sprites for score display
    IMAGES['numbers'] = (
        pygame.image.load('assets/sprites/0.png').convert_alpha(),
        pygame.image.load('assets/sprites/1.png').convert_alpha(),
        pygame.image.load('assets/sprites/2.png').convert_alpha(),
        pygame.image.load('assets/sprites/3.png').convert_alpha(),
        pygame.image.load('assets/sprites/4.png').convert_alpha(),
        pygame.image.load('assets/sprites/5.png').convert_alpha(),
        pygame.image.load('assets/sprites/6.png').convert_alpha(),
        pygame.image.load('assets/sprites/7.png').convert_alpha(),
        pygame.image.load('assets/sprites/8.png').convert_alpha(),
        pygame.image.load('assets/sprites/9.png').convert_alpha()
    )

    # game over sprite
    IMAGES['gameover'] = pygame.image.load('assets/sprites/gameover.png').convert_alpha()
    # message sprite for welcome screen
    IMAGES['message'] = pygame.image.load('assets/sprites/message.png').convert_alpha()
    # base (ground) sprite
    IMAGES['base'] = pygame.image.load('assets/sprites/base.png').convert_alpha()

    # sounds
    if 'win' in sys.platform:
        soundExt = '.wav'
    else:
        soundExt = '.ogg'

    SOUNDS['die']    = pygame.mixer.Sound('assets/audio/die' + soundExt)
    SOUNDS['hit']    = pygame.mixer.Sound('assets/audio/hit' + soundExt)
    SOUNDS['point']  = pygame.mixer.Sound('assets/audio/point' + soundExt)
    SOUNDS['swoosh'] = pygame.mixer.Sound('assets/audio/swoosh' + soundExt)
    SOUNDS['wing']   = pygame.mixer.Sound('assets/audio/wing' + soundExt)

    while True:

        counter +=1 



        if counter > 10000 and testing == False: 
            print("Training Finished")
            break
        
        if counter > 10 and testing == True: 
            print("Testing Finished")
            break 
        # select random background sprites
        randBg = 0
        IMAGES['background'] = pygame.image.load(BACKGROUNDS_LIST[randBg]).convert()

        # select random player sprites
        randPlayer = 0
        IMAGES['player'] = (
            pygame.image.load(PLAYERS_LIST[randPlayer][0]).convert_alpha(),
            pygame.image.load(PLAYERS_LIST[randPlayer][1]).convert_alpha(),
            pygame.image.load(PLAYERS_LIST[randPlayer][2]).convert_alpha(),
        )

        # select random pipe sprites
        pipeindex = 0
        IMAGES['pipe'] = (
            pygame.transform.rotate(
                pygame.image.load(PIPES_LIST[pipeindex]).convert_alpha(), 180),
            pygame.image.load(PIPES_LIST[pipeindex]).convert_alpha(),
        )

        # hitmask for pipes
        HITMASKS['pipe'] = (
            getHitmask(IMAGES['pipe'][0]),
            getHitmask(IMAGES['pipe'][1]),
        )

        # hitmask for player
        HITMASKS['player'] = (
            getHitmask(IMAGES['player'][0]),
            getHitmask(IMAGES['player'][1]),
            getHitmask(IMAGES['player'][2]),
        )

        movementInfo = showWelcomeAnimation()
        crashInfo = mainGame(movementInfo)
        showGameOverScreen(crashInfo)


def showWelcomeAnimation():
    """Shows welcome screen animation of flappy bird"""
    # index of player to blit on screen
    playerIndex = 0
    playerIndexGen = cycle([0, 1, 2, 1])
    # iterator used to change playerIndex after every 5th iteration
    loopIter = 0

    playerx = int(SCREENWIDTH * 0.2)
    playery = int((SCREENHEIGHT - IMAGES['player'][0].get_height()) / 2)

    messagex = int((SCREENWIDTH - IMAGES['message'].get_width()) / 2)
    messagey = int(SCREENHEIGHT * 0.12)

    basex = 0
    # amount by which base can maximum shift to left
    baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width()

    # player shm for up-down motion on welcome screen
    playerShmVals = {'val': 0, 'dir': 1}

    return {
                    'playery': playery + playerShmVals['val'],
                    'basex': basex,
                    'playerIndexGen': playerIndexGen,
                }


def mainGame(movementInfo):

    score = playerIndex = loopIter = 0
    playerIndexGen = movementInfo['playerIndexGen']
    playerx, playery = int(SCREENWIDTH * 0.2), movementInfo['playery']

    basex = movementInfo['basex']
    baseShift = IMAGES['base'].get_width() - IMAGES['background'].get_width()

    # get 2 new pipes to add to upperPipes lowerPipes list
    newPipe1 = getRandomPipe()
    newPipe2 = getRandomPipe()

    # list of upper pipes
    upperPipes = [
        {'x': STARTINGPIPEX, 'y': newPipe1[0]['y']},
        {'x': STARTINGPIPEX + (SCREENWIDTH / 2), 'y': newPipe2[0]['y']},
    ]

    # list of lowerpipe
    lowerPipes = [
        {'x': STARTINGPIPEX, 'y': newPipe1[1]['y']},
        {'x': STARTINGPIPEX + (SCREENWIDTH / 2), 'y': newPipe2[1]['y']},
    ]

    pipeVelX = -4

    # player velocity, max velocity, downward accleration, accleration on flap
    playerVelY    =  -9   # player's velocity along Y, default same as playerFlapped
    playerMaxVelY =  10   # max vel along Y, max descend speed
    playerMinVelY =  -8   # min vel along Y, max ascend speed
    playerAccY    =   1   # players downward accleration
    playerRot     =  45   # player's rotation
    playerVelRot  =   3   # angular speed
    playerRotThr  =  20   # rotation threshold
    playerFlapAcc =  -9   # players speed on flapping
    playerFlapped = False # True when player flaps

    # Experience tuple list
    exp = []

    # Active Pipe
    activePipe = 0

    while True:
        r = 0
        s = (round((playery-lowerPipes[activePipe]['y'])/discretisation), round((lowerPipes[activePipe]['x']-playerx)/discretisation), playerVelY)
        bot.appendState(s)
        max_act, max_val = bot.maxQ(s)

        for event in pygame.event.get():
            if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE):
                bot.saveQ()
                pygame.quit()
                sys.exit()

        if max_act:
                if playery > -2 * IMAGES['player'][0].get_height():
                    playerVelY = playerFlapAcc
                    playerFlapped = True
                    #SOUNDS['wing'].play()

        # Increment score for a new pipe passed
        playerMidPos = playerx + IMAGES['player'][0].get_width() / 2
        #Iterate through each of the pipes
        for pipe in upperPipes:
            pipeMidPos = pipe['x'] + IMAGES['pipe'][0].get_width() / 2
            if pipeMidPos <= playerMidPos < pipeMidPos + 4: #Increment score only if the pipe has 'just' passed
                score += 1
                activePipe+=1
                r+=scoreReward
                #SOUNDS['point'].play()

        # playerIndex basex change
        if (loopIter + 1) % 3 == 0:
            playerIndex = next(playerIndexGen)
        loopIter = (loopIter + 1) % 30
        basex = -((-basex + 100) % baseShift)

        # rotate the player
        if playerRot > -90:
            playerRot -= playerVelRot

        # player's movement
        if playerVelY < playerMaxVelY and not playerFlapped:
            playerVelY += playerAccY
        if playerFlapped:
            playerFlapped = False

            # more rotation to cover the threshold (calculated in visible rotation)
            playerRot = 45

        # Drop the player due to gravity
        playerHeight = IMAGES['player'][playerIndex].get_height()
        playery += min(playerVelY, BASEY - playery - playerHeight) # Check if drop height is more than the space between player and base

        # move pipes to left
        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            uPipe['x'] += pipeVelX
            lPipe['x'] += pipeVelX

        # add new pipe when first pipe is about to touch left of screen
        if 0 < upperPipes[0]['x'] < 5:
            newPipe = getRandomPipe()
            upperPipes.append(newPipe[0])
            lowerPipes.append(newPipe[1])

        # remove first pipe if its out of the screen
        if upperPipes[0]['x'] < -IMAGES['pipe'][0].get_width():
            upperPipes.pop(0)
            lowerPipes.pop(0)
            activePipe-=1

        # check for crash here
        crashTest = checkCrash({'x': playerx, 'y': playery, 'index': playerIndex},
                               upperPipes, lowerPipes)
        
        # Check for crash, if crashed then assign die reward
        if crashTest[0]:
            r += dieReward

        # Create experience tuple
        s2 = (round((playery-lowerPipes[activePipe]['y'])/discretisation), round((lowerPipes[activePipe]['x']-playerx)/discretisation), playerVelY)
        bot.appendState(s2)
        exp.append((s, max_act, r, s2))

        # If player has crashed, return to showGameOverScreen()
        # Update the Q table values using exp list and pipe crash
        if crashTest[0]:
            if testing == False: 
                bot.updateQ(exp, not crashTest[1], score)
                return {
                    """
                    'y': playery,
                    'groundCrash': crashTest[1],
                    'basex': basex,
                    'upperPipes': upperPipes,
                    'lowerPipes': lowerPipes,
                    'score': score,
                    'playerVelY': playerVelY,
                    'playerRot': playerRot
                    """
                }
            else: 
                # if we're testing, no need to update Q
                # simply sample
                bot.sample(score, exp, True)
                return {
                    """
                    'y': playery,
                    'groundCrash': crashTest[1],
                    'basex': basex,
                    'upperPipes': upperPipes,
                    'lowerPipes': lowerPipes,
                    'score': score,
                    'playerVelY': playerVelY,
                    'playerRot': playerRot
                    """
                }
                

        # draw sprites
        SCREEN.blit(IMAGES['background'], (0,0))

        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            SCREEN.blit(IMAGES['pipe'][0], (uPipe['x'], uPipe['y']))
            SCREEN.blit(IMAGES['pipe'][1], (lPipe['x'], lPipe['y']))

        SCREEN.blit(IMAGES['base'], (basex, BASEY))
        # print score so player overlaps the score
        showScore(score)

        # Player rotation has a threshold
        visibleRot = playerRotThr
        if playerRot <= playerRotThr:
            visibleRot = playerRot
        
        playerSurface = pygame.transform.rotate(IMAGES['player'][playerIndex], visibleRot)
        SCREEN.blit(playerSurface, (playerx, playery))

        pygame.display.update()
        FPSCLOCK.tick(FPS)


def showGameOverScreen(crashInfo):
    return

def playerShm(playerShm):
    """oscillates the value of playerShm['val'] between 8 and -8"""
    if abs(playerShm['val']) == 8:
        playerShm['dir'] *= -1

    if playerShm['dir'] == 1:
         playerShm['val'] += 1
    else:
        playerShm['val'] -= 1

# Returns a list of two pipes, upper and lower in the form of x-y coordinates
def getRandomPipe():
    """returns a randomly generated pipe"""
    # y of gap between upper and lower pipe
    gapY = random.randrange(0, int(BASEY * 0.6 - PIPEGAPSIZE))
    gapY += int(BASEY * 0.2)
    pipeHeight = IMAGES['pipe'][0].get_height()
    pipeX = SCREENWIDTH + 10

    return [
        {'x': pipeX, 'y': gapY - pipeHeight},  # upper pipe
        {'x': pipeX, 'y': gapY + PIPEGAPSIZE}, # lower pipe
    ]


def showScore(score):
    """displays score in center of screen"""
    scoreDigits = [int(x) for x in list(str(score))]
    totalWidth = 0 # total width of all numbers to be printed

    for digit in scoreDigits:
        totalWidth += IMAGES['numbers'][digit].get_width()

    Xoffset = (SCREENWIDTH - totalWidth) / 2

    for digit in scoreDigits:
        SCREEN.blit(IMAGES['numbers'][digit], (Xoffset, SCREENHEIGHT * 0.1))
        Xoffset += IMAGES['numbers'][digit].get_width()

# Takes player index to get the hitmask
# Takes list of upperpipes and lower pipes
# Returns a list [a, b]; a is true if collision has occured and b is false for upper pipe collision else true
def checkCrash(player, upperPipes, lowerPipes):
    """returns True if player collders with base or pipes."""
    pi = player['index']
    player['w'] = IMAGES['player'][0].get_width()
    player['h'] = IMAGES['player'][0].get_height()

    # if player crashes into ground
    if player['y'] + player['h'] >= BASEY - 1:
        return [True, True]
    else:

        playerRect = pygame.Rect(player['x'], player['y'],
                      player['w'], player['h'])
        pipeW = IMAGES['pipe'][0].get_width()
        pipeH = IMAGES['pipe'][0].get_height()

        for uPipe, lPipe in zip(upperPipes, lowerPipes):
            # upper and lower pipe rects
            uPipeRect = pygame.Rect(uPipe['x'], uPipe['y'], pipeW, pipeH)
            lPipeRect = pygame.Rect(lPipe['x'], lPipe['y'], pipeW, pipeH)

            # player and upper/lower pipe hitmasks
            pHitMask = HITMASKS['player'][pi]
            uHitmask = HITMASKS['pipe'][0]
            lHitmask = HITMASKS['pipe'][1]

            # if bird collided with upipe or lpipe
            uCollide = pixelCollision(playerRect, uPipeRect, pHitMask, uHitmask)
            lCollide = pixelCollision(playerRect, lPipeRect, pHitMask, lHitmask)

            if uCollide:
                return [True, False]
            elif lCollide:
                return [True, True]

    return [False, False]

def pixelCollision(rect1, rect2, hitmask1, hitmask2):
    """Checks if two objects collide and not just their rects"""
    rect = rect1.clip(rect2)

    if rect.width == 0 or rect.height == 0:
        return False

    x1, y1 = rect.x - rect1.x, rect.y - rect1.y
    x2, y2 = rect.x - rect2.x, rect.y - rect2.y

    for x in xrange(rect.width):
        for y in xrange(rect.height):
            if hitmask1[x1+x][y1+y] and hitmask2[x2+x][y2+y]:
                return True
    return False

def getHitmask(image):
    """returns a hitmask using an image's alpha."""
    mask = []
    for x in xrange(image.get_width()):
        mask.append([])
        for y in xrange(image.get_height()):
            mask[x].append(bool(image.get_at((x,y))[3]))
    return mask


if __name__ == '__main__':
    main()