# Single player AI

** Part 1: Q-Learning **

*Part 1.1: Single-Player Pong *

In [1]:
import numpy as np
import random

class discretePong():

    def __init__(self):
        self.bX = 0.5
        self.bY = 0.5
        self.vX = 0.03
        self.vY = 0.01
        self.pX = 1
        self.pY = 0.5-0.2/2
        self.terminate = False
        self.score = 0
        self.discreteValues()
        
    def discreteValues(self):
        self.ballX = int(12*self.bX-1)
        self.ballY = int(12*self.bY-1)
        if self.vX >= 0: self.velocityX = 1
        else: self.velocityX = -1
        if self.vY == 0: self.velocityY = 0
        elif self.vY > 0: self.velocityY = 1
        else: self.velocityY = -1
        if self.pY >= 1-0.2: self.paddleY = 11
        else: self.paddleY = int(12*self.pY/(1-0.2))
        
    def printState(self):
        print("bX: "+str(self.ballX)+", bY: "+str(self.ballY))
        print("vX: "+str(self.velocityX)+", vY: "+str(self.velocityY))
#         print("pY: "+str(self.pY))
        print("paddleY: "+str(self.paddleY))
#         print("****************************************")
                
    
    def getStateIndex(self):
        if self.terminate: return -1
        index = self.ballX*12*12*6+self.ballY*12*6+self.paddleY*6
        if self.velocityX == -1 and self.velocityY == 0: return index
        if self.velocityX == -1 and self.velocityY == 1: return index+1
        if self.velocityX == -1 and self.velocityY == -1: return index+2
        if self.velocityX == 1 and self.velocityY == 0: return index+3
        if self.velocityX == 1 and self.velocityY == 1: return index+4
        if self.velocityX == 1 and self.velocityY == -1: return index+5
        print("Index Error!")
        return 0
    
    def transitionModel(self, pDy):
        reward = 0
        
        #update ball
        if self.bX >= 0 and self.bX <= 1:
            self.bX += self.vX
        if self.bY >= 0 and self.bY <= 1:
            self.bY += self.vY
            
        #update paddleY
        if pDy and self.pY+pDy >= 0 and self.pY+pDy <= 1:
            self.pY += pDy
        
        #check bounce
        ##edge bounce
        if self.bY < 0: 
            self.bY = -self.bY
            self.vY = -self.vY
        if self.bY > 1:
            self.bY = 2-self.bY
            self.vY = -self.vY
        if self.bX < 0:
            self.bX = -self.bX
            self.vX = -self.vX
            
        ##paddle bounce
        if self.bX >= 1 and self.bY >= self.pY and self.bY <= self.pY+0.2:
#             print("Hit!!!!!!")
            reward = 1
            self.score += 1
            self.bX = 2*self.pX - self.bX
            U = np.random.uniform(-0.015, -0.015)
            self.vX = -self.vX + U
            while abs(self.vX) <= 0.03 or abs(self.vX) >= 1: 
                U = np.random.uniform(-0.015, 0.015)
                self.vX = -self.vX + U
            V = np.random.uniform(-0.03, 0.03)
            self.vY = -self.vY + V
            while abs(self.vY) >= 1: 
                V = np.random.uniform(-0.03, 0.03)
                self.vY = -self.vY + V
                
        ##paddle missed
        elif self.bX >= 1:
#             print("Passed..........")
            reward = -1
            self.score -= 1
            self.terminate = True
            
        self.discreteValues()
            
        return reward

In [2]:
actionStates = np.array([0, 0.04, -0.04])

#return index of the max in the arr
#In case of multiple max, randomize the index among all the max
def chooseAction(QArr, epsilon):
    if (np.random.uniform() > epsilon or (QArr == 0).all()):
        return np.random.randint(0, 3)
    else: return np.argmax(QArr)

def QLearning(gamma, c, epsilon, maxEpoch):
    pongStates = np.arange(10369)
    QTable = np.zeros((10369, 3))
    freqTable = np.zeros(10369)
    scores = np.zeros(maxEpoch)
    for epoch in range(maxEpoch):
        pongState = discretePong()
        while not pongState.terminate:
#             pongState.printState()
            stateIndex = int(pongState.getStateIndex())
            actionIndex = chooseAction(QTable[stateIndex], epsilon)
            #Previous Estimated reward
            QPredict = QTable[stateIndex][actionIndex]
            #Action performed
            paddleDy = actionStates[actionIndex]
#             print("paddleDy: "+str(paddleDy))
#             print("***************************")
            #update pongState
            reward = pongState.transitionModel(paddleDy)
            #Estimate reward
            if not pongState.terminate:
#                 pongState.printState()
                nextStateIndex = int(pongState.getStateIndex())
                nextActionIndex = chooseAction(QTable[nextStateIndex], epsilon)
                QTarget = reward + gamma*QTable[nextStateIndex][nextActionIndex]
            else: 
                QTarget = reward
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
            alpha = c/(c+freqTable[stateIndex])
            QTable[stateIndex][actionIndex] += alpha*(QTarget-QPredict)
            freqTable[stateIndex] += 1
    return QTable, scores

def agentTest(policyTable):
    scores = np.zeros(200)
    for i in range(200):
        pongState = discretePong()
        while not pongState.terminate:
            stateIndex = int(pongState.getStateIndex())
            actionIndex = np.argmax(policyTable[stateIndex])
            #Action performed
            paddleDy = actionStates[actionIndex]
            #update pongState
            reward = pongState.transitionModel(paddleDy)
            if pongState.terminate: 
                scores[i] = pongState.score
    return scores

In [None]:
# opt_gamma = 0.1
# opt_c = 0.1
# maxScore = 0
# for i in range(1, 10):
#     gamma = 0.1*i
#     for j in range(1, 10):
#         c = 0.1*j
#         qt, scores = QLearning(gamma, c, 1000)
#         testScores = agentTest(qt)
#         if np.average(testScores) >= maxScore:
#             maxScore = np.average(testScores)
#             opt_gamma = gamma
#             opt_c = c
# print(opt_gamma)
# print(opt_c)

In [3]:
qt, scores = QLearning(0.5, 0.6, 0.9, 100000)

KeyboardInterrupt: 

In [None]:
testScores = agentTest(qt)

In [None]:
import matplotlib.pyplot as plt 
plt.plot(scores)
print(np.average(testScores))
print(testScores)

Single Player GUI

In [None]:
## Initialization

import pygame, sys
from pygame.locals import *

FPS = 100
VELOCITY = 1

WIDTH = 500
HEIGHT = 500
THICKNESS = 10
RADIUS = 5
PADDLESIZE = 0.2*500
PADDLEOFFSET = 20

BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
RED = (255, 0, 0)

In [None]:
# Utility Functions

def drawCanvas():
    SURFACE.fill(BLACK)
    pygame.draw.rect(SURFACE, WHITE, ((0, 0),(WIDTH, HEIGHT)), THICKNESS*2)
    
def drawPaddle(paddle):
    if paddle.bottom > HEIGHT - THICKNESS:
        paddle.bottom = HEIGHT - THICKNESS
    elif paddle.top < THICKNESS:
        paddle.top = THICKNESS
    pygame.draw.rect(SURFACE, WHITE, paddle)

def drawBall(ball):
    pygame.draw.circle(SURFACE, RED, ball, RADIUS)
    
def moveBall(ball, velocityX, velocityY):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballRight >= WIDTH-THICKNESS:
        return (int(WIDTH/2), int(HEIGHT/2))
    return (ball[0] + velocityX, ball[1] + velocityY)

def checkPaddleHit(ball, paddle1, paddle2, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballDx < 0 and paddle1.right >= ballLeft and paddle1.top < ballTop and paddle1.bottom > ballBottom:
        ballDx *= -1
    if ballDx >= 0 and paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        ballDx *= -1
    return ballDx

def checkEdgeHit(ball, ballDy):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if (ballTop <= THICKNESS) or (ballBottom >= HEIGHT-THICKNESS):
        ballDy *= -1
    return ballDy

def updateScore(paddle2, ball, score, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballRight >= WIDTH-THICKNESS:
        score -= 1
    elif paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        score += 1
    return score

def displayScore(score):
    resSurface = FONT.render("Score: %s" %(score), True, WHITE)
    resRect = resSurface.get_rect()
    resRect.topleft = (50, 25)
    SURFACE.blit(resSurface, resRect)

In [None]:
# Main

def main():
    pygame.init()
    global SURFACE
    global FONT, FONTSIZE
    FONTSIZE = 20
    FONT = pygame.font.Font("freesansbold.ttf", FONTSIZE)
    
    FPSCLOCK = pygame.time.Clock()
    SURFACE = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("SinglePlayer BoringPong")
    
    ballX = int(WIDTH/2)
    ballY = int(HEIGHT/2)
    P1X = PADDLEOFFSET
    P2X = WIDTH - PADDLEOFFSET - THICKNESS
    P1Y = int((HEIGHT-PADDLESIZE)/2)
    P2Y = int((HEIGHT-PADDLESIZE)/2)
    score = 0
#     ballDx = int(0.03*WIDTH)
#     ballDy = int(0.01*HEIGHT)
    ballDx = VELOCITY*3
    ballDy = VELOCITY
    
    #paddle1 is just a wall
    paddle1 = pygame.Rect(P1X, P1Y, THICKNESS, HEIGHT)
    #paddle2 is the player
    paddle2 = pygame.Rect(P2X, P2Y, THICKNESS, PADDLESIZE)
    ball = (ballX, ballY)
    
    drawCanvas()
    drawPaddle(paddle1)
    drawPaddle(paddle2)
    drawBall(ball)
    
    pygame.mouse.set_visible(0)
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT or (event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE):
                pygame.display.quit()
#                 pygame.quit()
            elif event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                while True: 
                    event = pygame.event.wait()
                    if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                        break
            elif event.type == pygame.MOUSEMOTION:
                pX, pY = event.pos
                paddle2.y = pY 
                
        drawCanvas()
        drawPaddle(paddle1)
        drawPaddle(paddle2)
        drawBall(ball)        
        
        ball = moveBall(ball, ballDx, ballDy)
        ballDy = checkEdgeHit(ball, ballDy)
        ballDx = checkPaddleHit(ball, paddle1, paddle2, ballDx)
        score = updateScore(paddle2, ball, score, ballDx)
        
        displayScore(score)
        
        pygame.display.update()
        FPSCLOCK.tick(FPS)
        
if __name__=="__main__":
    main()

# Naive AI vs Human

A naive reflex agent(left) play against human(right)

In [None]:
## Utility Functions

def drawCanvas():
    SURFACE.fill(BLACK)
    pygame.draw.rect(SURFACE, WHITE, ((0, 0),(WIDTH, HEIGHT)), THICKNESS*2)
    pygame.draw.line(SURFACE, WHITE, (int(WIDTH/2), 0), (int(WIDTH/2), HEIGHT), int(THICKNESS/2))
    
def drawPaddle(paddle):
    if paddle.bottom > HEIGHT - THICKNESS:
        paddle.bottom = HEIGHT - THICKNESS
    elif paddle.top < THICKNESS:
        paddle.top = THICKNESS
    pygame.draw.rect(SURFACE, WHITE, paddle)

def drawBall(ball):
    pygame.draw.circle(SURFACE, RED, ball, RADIUS)
    
def moveBall(ball, ballDx, ballDy):
    return (ball[0] + ballDx*VELOCITY, ball[1] + ballDy*VELOCITY)

def checkEdgeHit(ball, ballDx, ballDy):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if (ballLeft <= THICKNESS) or (ballRight >= WIDTH-THICKNESS):
        ballDx *= -1
    if (ballTop <= THICKNESS) or (ballBottom >= HEIGHT-THICKNESS):
        ballDy *= -1
    return ballDx, ballDy

def checkPaddleHit(ball, paddle1, paddle2, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballDx == -1 and paddle1.right >= ballLeft and paddle1.top < ballTop and paddle1.bottom > ballBottom:
        ballDx *= -1
    if ballDx == 1 and paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        ballDx *= -1
    return ballDx

def updateScore(paddle2, ball, score, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballRight >= WIDTH-THICKNESS:
        score -= 1
    elif ballDx == 1 and paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        score += 1
    return score

def displayScore(score):
    resSurface = FONT.render("Score: %s" %(score), True, WHITE)
    resRect = resSurface.get_rect()
    resRect.topleft = (50, 25)
    SURFACE.blit(resSurface, resRect)

def naiveAI(ball, ballDx, paddle1):
    if ballDx == 1:
        if paddle1.centery < (HEIGHT)/2:
            paddle1.y += VELOCITY
        elif paddle1.centery > (HEIGHT)/2:
            paddle1.y -= VELOCITY
    elif ballDx == -1:
        if paddle1.centery < ball[1]:
            paddle1.y += VELOCITY
        else:
            paddle1.y -= VELOCITY
    return paddle1

In [None]:
## Main

def main():
    pygame.init()
    global SURFACE
    global FONT, FONTSIZE
    FONTSIZE = 20
    FONT = pygame.font.Font("freesansbold.ttf", FONTSIZE)
    
    FPSCLOCK = pygame.time.Clock()
    SURFACE = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("BoringPong")
    
    ballX = int(WIDTH/2)
    ballY = int(HEIGHT/2)
    P1X = PADDLEOFFSET
    P2X = WIDTH - PADDLEOFFSET - THICKNESS
    P1Y = int((HEIGHT-PADDLESIZE)/2)
    P2Y = int((HEIGHT-PADDLESIZE)/2)
    score = 0
    ballDx = -1
    ballDy = -1
    
    paddle1 = pygame.Rect(P1X, P1Y, THICKNESS, PADDLESIZE)
    paddle2 = pygame.Rect(P2X, P2Y, THICKNESS, PADDLESIZE)
    ball = (ballX, ballY)
    
    drawCanvas()
    drawPaddle(paddle1)
    drawPaddle(paddle2)
    drawBall(ball)
    
    pygame.mouse.set_visible(0)
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.display.quit()
#                 pygame.quit()
#                 sys.exit()
            elif event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                while True: 
                    event = pygame.event.wait()
                    if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                        break
            elif event.type == pygame.MOUSEMOTION:
                pX, pY = event.pos
                paddle2.y = pY 
                
        drawCanvas()
        drawPaddle(paddle1)
        drawPaddle(paddle2)
        drawBall(ball)        
        
        ball = moveBall(ball, ballDx, ballDy)
        ballDx, ballDy = checkEdgeHit(ball, ballDx, ballDy)
        score = updateScore(paddle2, ball, score, ballDx)
        ballDx = checkPaddleHit(ball, paddle1, paddle2, ballDx)
        paddle1 = naiveAI(ball, ballDx, paddle1)
        
        displayScore(score)
        
        pygame.display.update()
        FPSCLOCK.tick(FPS)
        
if __name__=="__main__":
    main()