# Single player AI

** Part 1: Q-Learning **

*Part 1.1: Single-Player Pong *

In [None]:
## Initialization
import numpy as np
import matplotlib.pyplot as plt
import pygame, sys
from pygame.locals import *

FPS = 30
VELOCITY = 1

WIDTH = 600
HEIGHT = 600
THICKNESS = 5
RADIUS = 5
PADDLESIZE = 0.2*600
PADDLEOFFSET = 5

BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
RED = (255, 0, 0)

In [None]:
class discretePong():

    def __init__(self):
        self.bX = 0.5
        self.bY = 0.5
        self.vX = 0.03
        self.vY = 0.01
        self.pX = 1
        self.pY = 0.5-0.2/2
        self.terminate = False
        self.score = 0
        self.index = int(self.getStateIndex())
        
    def getStateIndex(self):
        self.discreteValues()
        if self.terminate: return -1
        index = self.ballX*12*12*6+self.ballY*12*6+self.paddleY*6
        if self.velocityX == -1 and self.velocityY == 0: return index
        if self.velocityX == -1 and self.velocityY == 1: return index+1
        if self.velocityX == -1 and self.velocityY == -1: return index+2
        if self.velocityX == 1 and self.velocityY == 0: return index+3
        if self.velocityX == 1 and self.velocityY == 1: return index+4
        if self.velocityX == 1 and self.velocityY == -1: return index+5
        print("Index Error!")
        return 0
        
    def discreteValues(self):
        self.ballX = min(int(12*self.bX), 11)
        self.ballY = min(int(12*self.bY), 11)
        if self.vX > 0: self.velocityX = 1
        else: self.velocityX = -1
        if abs(self.vY) < 0.015: self.velocityY = 0
        elif self.vY > 0: self.velocityY = 1
        else: self.velocityY = -1
        self.paddleY = min(int(12*self.pY/(1-0.2)), 11)
        
    def printState(self):
        print("bX: "+str(self.ballX)+", bY: "+str(self.ballY))
        print("vX: "+str(self.velocityX)+", vY: "+str(self.velocityY))
#         print("pY: "+str(self.pY))
        print("paddleY: "+str(self.paddleY))
#         print("****************************************")
    
#     def checkPlayer(self):
        
    
    def transitionModel(self, pDy):
            
        #update paddleY
        if self.pY+pDy <= 0: self.pY = 0
        elif self.pY+pDy >= 0.8: self.pY = 0.8
        else: self.pY += pDy

        #update ball
        self.bX += self.vX
        self.bY += self.vY
        #check bounce
        ##edge bounce
        if self.bY <= 0: 
            self.bY = -self.bY
            self.vY = -self.vY
        if self.bY >= 1:
            self.bY = 2-self.bY
            self.vY = -self.vY
        if self.bX <= 0:
            self.bX = -self.bX
            self.vX = -self.vX
            
        ##paddle bounce
        if self.bX >= 1 and self.bY >= self.pY and self.bY <= self.pY+0.2:
#             print("Hit!!!!!!")
            self.score += 1
            self.bX = 2-self.bX
            U = np.random.uniform(-0.015, -0.015)
            self.vX = -self.vX + U
            while abs(self.vX) <= 0.03 or abs(self.vX) > 1:
                U = np.random.uniform(-0.015, 0.015)
                self.vX = -self.vX + U
            V = np.random.uniform(-0.03, 0.03)
            self.vY = -self.vY + V
            while abs(self.vY) > 1: 
                V = np.random.uniform(-0.03, 0.03)
                self.vY = -self.vY + V
            self.index = int(self.getStateIndex())
            return 1
                
        ##paddle missed
        elif self.bX >= 1:
#             print("Passed..........")
            self.score -= 1
            self.terminate = True
            self.index = int(self.getStateIndex())
            return -1
            
        self.index = int(self.getStateIndex())
            
        return 0

In [None]:
actionStates = np.array([0, 0.04, -0.04])

#return index of the max in the arr
#In case of multiple max, randomize the index among all the max
def getActionIndex(q, epsilon):
#     print("Current Action Choices: ")
#     print(q)
    if np.random.rand() < epsilon: return np.random.randint(0, 3)
    best = [i for i in range(3) if q[i] == max(q)]
    if len(best) > 1: 
        return np.random.choice(best)
    return best[0]

# def QLearn(stateIndex, actionIndex, reward, newState, ):
    
    
# def explorationFunction(q, freq, threshold):
#     actions = np.zeros(3)
#     for i in range(3):
#         if freq[i] < threshold: actions[i] = 1000
#         else: actions[i] = q[i]
# #     if (actions == 0).all():
# #         return np.random.randint(0, 3)
#     return np.argmax(actions)
    
def QLearning(gamma, c, epsilon, maxEpoch):
    qTable = np.zeros((10369, 3))
    freqTable = np.zeros((10369, 3))
    scores = np.zeros(maxEpoch)
    for epoch in range(maxEpoch):
        if epoch == 100000: epsilon = 0
        pongState = discretePong()
        preStateIndex = pongState.index
        preActionIndex = getActionIndex(qTable[preStateIndex], epsilon)
        preReward = 0
        reward = pongState.transitionModel(actionStates[preActionIndex])
        while True:
#             pongState.printState()
            stateIndex = pongState.index
#             print("stateIndex: "+str(stateIndex)+", actionIndex: "+str(actionIndex))
#             print("qTarget: "+str(qTarget)+" ,qPredict: "+str(qPredict))
            freqTable[preStateIndex][preActionIndex] += 1
            if pongState.terminate:
                qTable[preStateIndex][preActionIndex] = reward
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
                break
            qlearnIndex = np.argmax(qTable[stateIndex])
            increment = float(preReward+gamma*qTable[stateIndex][qlearnIndex]-qTable[preStateIndex][preActionIndex])
            alpha = float(c/(c+freqTable[preStateIndex][preActionIndex]))
#             print("preS, preA: "+str(preStateIndex)+", "+str(preActionIndex)+". qVal: "+ str(qTable[preStateIndex][preActionIndex]))
            qTable[preStateIndex][preActionIndex] += float(alpha*(increment))
#             print("S, A: "+str(preStateIndex)+", "+str(preActionIndex)+". qVal: "+ str(qTable[preStateIndex][preActionIndex]))
            actionIndex = getActionIndex(qTable[stateIndex], epsilon)
            preStateIndex = stateIndex
            preActionIndex = actionIndex
            preReward = reward
            reward = pongState.transitionModel(actionStates[actionIndex])
            
    return qTable, freqTable, scores

def SARSALearning(gamma, c, epsilon, maxEpoch):
    qTable = np.zeros((10369, 3))
    freqTable = np.zeros((10369, 3))
    scores = np.zeros(maxEpoch)
    for epoch in range(maxEpoch):
        if epoch == 100000: epsilon = 0
        pongState = discretePong()
        preStateIndex = pongState.index
        preActionIndex = getActionIndex(qTable[preStateIndex], epsilon)
        preReward = 0
        reward = pongState.transitionModel(actionStates[preActionIndex])
        while True:
#             pongState.printState()
            stateIndex = pongState.index
#             print("stateIndex: "+str(stateIndex)+", actionIndex: "+str(actionIndex))
#             print("qTarget: "+str(qTarget)+" ,qPredict: "+str(qPredict))
            freqTable[preStateIndex][preActionIndex] += 1
            if pongState.terminate:
                qTable[preStateIndex][preActionIndex] = reward
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
                break
            actionIndex = getActionIndex(qTable[stateIndex], epsilon)
            increment = float(preReward+gamma*qTable[stateIndex][actionIndex]-qTable[preStateIndex][preActionIndex])
            alpha = float(c/(c+freqTable[preStateIndex][preActionIndex]))
#             print("preS, preA: "+str(preStateIndex)+", "+str(preActionIndex)+". qVal: "+ str(qTable[preStateIndex][preActionIndex]))
            qTable[preStateIndex][preActionIndex] += float(alpha*(increment))
#             print("S, A: "+str(preStateIndex)+", "+str(preActionIndex)+". qVal: "+ str(qTable[preStateIndex][preActionIndex]))
            preStateIndex = stateIndex
            preActionIndex = actionIndex
            preReward = reward
            reward = pongState.transitionModel(actionStates[actionIndex])
            
    return qTable, freqTable, scores

def agentTest(qTable):
    scores = np.zeros(200)
    for epoch in range(200):
        pongState = discretePong()
        while True:
#             pongState.printState()
            stateIndex = pongState.index
            actionIndex = np.argmax(qTable[stateIndex])
            if pongState.terminate:
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
                break
            reward = pongState.transitionModel(actionStates[actionIndex])
    return scores

In [None]:
max_epoch = 100000
qT, fT, scores = QLearning(0.7, 60, 0.03, max_epoch)
# qT, fT, scores = SARSALearning(0.7, 60, 0.03, max_epoch)
print(np.average(scores))

In [None]:
testScores = agentTest(qT)
total = 0
n = 0
for i in range(10369):
    for j in range(3):
        if fT[i][j] != 0: 
            total += fT[i][j]
            n += 1
print(total/n)
print(total/(10369*3))
# displayTestAgent(qT)

In [None]:
plt.plot(scores)
plt.show()
print(np.average(testScores))
print(testScores)

*Part 1 Extra: Animations * 

In [None]:
def drawCanvas():
    SURFACE.fill(BLACK)
    pygame.draw.rect(SURFACE, WHITE, ((0, 0),(WIDTH, HEIGHT)), THICKNESS*2)
    
def drawPaddle(paddle):
    if paddle.bottom > HEIGHT - THICKNESS:
        paddle.bottom = HEIGHT - THICKNESS
    elif paddle.top < THICKNESS:
        paddle.top = THICKNESS
    pygame.draw.rect(SURFACE, WHITE, paddle)

def drawBall(ball):
    pygame.draw.circle(SURFACE, RED, ball, RADIUS)

def displayScore(score):
    resSurface = FONT.render("Score: %s" %(score), True, WHITE)
    resRect = resSurface.get_rect()
    resRect.topleft = (50, 25)
    SURFACE.blit(resSurface, resRect)

In [None]:
def displayQLearning(gamma, c, epsilon, maxEpoch, qTable = np.zeros((10369, 3)), freqTable = np.zeros((10369, 3))):
    pygame.init()
    global SURFACE
    global FONT, FONTSIZE
    FONTSIZE = 20
    FONT = pygame.font.Font("freesansbold.ttf", FONTSIZE)
    FPSCLOCK = pygame.time.Clock()
    SURFACE = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("SinglePlayer BoringPong")
    
    ballX = int(WIDTH/2)
    ballY = int(HEIGHT/2)
    P1X = PADDLEOFFSET
    P2X = WIDTH - PADDLEOFFSET - THICKNESS
    P1Y = int((HEIGHT-PADDLESIZE)/2)
    P2Y = int((HEIGHT-PADDLESIZE)/2)
    score = 0
    
    #paddle1 is just a wall
    paddle1 = pygame.Rect(P1X, P1Y, THICKNESS, HEIGHT)
    #paddle2 is the player
    paddle2 = pygame.Rect(P2X, P2Y, THICKNESS, PADDLESIZE)
    ball = (ballX, ballY)
    
    drawCanvas()
    drawPaddle(paddle1)
    drawPaddle(paddle2)
    drawBall(ball)
    
    pygame.mouse.set_visible(0)
    
    scores = np.zeros(maxEpoch)
    
    for epoch in range(maxEpoch):
        pongState = discretePong()
        preStateIndex = pongState.index
        preActionIndex = getActionIndex(qTable[preStateIndex], epsilon)
        preReward = 0
        reward = pongState.transitionModel(actionStates[preActionIndex])
        while True:
            for event in pygame.event.get():
                if event.type == pygame.QUIT or (event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE):
                    pygame.display.quit()
                elif event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                    while True: 
                        event = pygame.event.wait()
                        if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                            break
                            
            drawCanvas()
            drawPaddle(paddle1)
            paddle2 = pygame.Rect(int(P2X), int(pongState.pY*600), THICKNESS, PADDLESIZE)
            drawPaddle(paddle2)
            ball = (int(pongState.bX*600), int(pongState.bY*600))
            drawBall(ball)
            score = pongState.score
            displayScore(score)
            
#             pongState.printState()
            stateIndex = pongState.index
#             print("stateIndex: "+str(stateIndex)+", actionIndex: "+str(actionIndex))
#             print("qTarget: "+str(qTarget)+" ,qPredict: "+str(qPredict))
            freqTable[preStateIndex][preActionIndex] += 1
            if pongState.terminate:
                qTable[preStateIndex][preActionIndex] = reward
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
                break
            qlearnIndex = np.argmax(qTable[stateIndex])
            increment = float(preReward+gamma*qTable[stateIndex][qlearnIndex]-qTable[preStateIndex][preActionIndex])
            alpha = float(c/(c+freqTable[preStateIndex][preActionIndex]))
            qTable[preStateIndex][preActionIndex] += float(alpha*(increment))
            actionIndex = getActionIndex(qTable[stateIndex], epsilon)
            preStateIndex = stateIndex
            preActionIndex = actionIndex
            preReward = reward
            reward = pongState.transitionModel(actionStates[actionIndex])
    
            pygame.display.update()
            FPSCLOCK.tick(FPS)

def displayTestAgent(qTable):
    pygame.init()
    global SURFACE
    global FONT, FONTSIZE
    FONTSIZE = 20
    FONT = pygame.font.Font("freesansbold.ttf", FONTSIZE)
    FPSCLOCK = pygame.time.Clock()
    SURFACE = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("SinglePlayer BoringPong")
    
    ballX = int(WIDTH/2)
    ballY = int(HEIGHT/2)
    P1X = PADDLEOFFSET
    P2X = WIDTH - PADDLEOFFSET - THICKNESS
    P1Y = int((HEIGHT-PADDLESIZE)/2)
    P2Y = int((HEIGHT-PADDLESIZE)/2)
    score = 0
    
    #paddle1 is just a wall
    paddle1 = pygame.Rect(P1X, P1Y, THICKNESS, HEIGHT)
    #paddle2 is the player
    paddle2 = pygame.Rect(P2X, P2Y, THICKNESS, PADDLESIZE)
    ball = (250, 250)
    
    drawCanvas()
    drawPaddle(paddle1)
    drawPaddle(paddle2)
    drawBall(ball)
    
    pygame.mouse.set_visible(0)

    pongStates = np.arange(10369)
    QTable = np.zeros((10369, 3))
    freqTable = np.zeros(10369)
    scores = np.zeros(200)

    for epoch in range(200):
        pongState = discretePong()
        while True:
            for event in pygame.event.get():
                if event.type == pygame.QUIT or (event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE):
                    pygame.display.quit()
                elif event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                    while True: 
                        event = pygame.event.wait()
                        if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                            break
                            
            drawCanvas()
            drawPaddle(paddle1)
            paddle2 = pygame.Rect(int(P2X), int((pongState.pY)*600), THICKNESS, PADDLESIZE)
            drawPaddle(paddle2)
            ball = (int((pongState.bX)*600), int((pongState.bY)*600))
            drawBall(ball)
            score = pongState.score
            displayScore(score)
            
            stateIndex = pongState.index
            actionIndex = np.argmax(qTable[stateIndex])
            if pongState.terminate:
                scores[epoch] = pongState.score
#                 print("epoch: " + str(epoch))
#                 print("score:" + str(pongState.score))
#                 print("---------------------------")
                break
            reward = pongState.transitionModel(actionStates[actionIndex])
    
            pygame.display.update()
            FPSCLOCK.tick(FPS)

In [None]:
# displayQLearning(0.7, 5, 0.05, max_epoch)
displayTestAgent(qT)

*Part 1 Extra: Play with Agents *

In [None]:
## Utility Functions

def drawCanvas():
    SURFACE.fill(BLACK)
    pygame.draw.rect(SURFACE, WHITE, ((0, 0),(WIDTH, HEIGHT)), THICKNESS*2)
    pygame.draw.line(SURFACE, WHITE, (int(WIDTH/2), 0), (int(WIDTH/2), HEIGHT), int(THICKNESS/2))
    
def drawPaddle(paddle):
    if paddle.bottom > HEIGHT - THICKNESS:
        paddle.bottom = HEIGHT - THICKNESS
    elif paddle.top < THICKNESS:
        paddle.top = THICKNESS
    pygame.draw.rect(SURFACE, WHITE, paddle)

def drawBall(ball):
    pygame.draw.circle(SURFACE, RED, ball, RADIUS)

def checkEdgeHit(ball, ballDx, ballDy):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if (ballLeft <= THICKNESS) or (ballRight >= WIDTH-THICKNESS):
        ballDx *= -1
    if (ballTop <= THICKNESS) or (ballBottom >= HEIGHT-THICKNESS):
        ballDy *= -1
    return ballDx, ballDy

def checkPaddleHit(ball, paddle1, paddle2, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballDx == -1 and paddle1.right >= ballLeft and paddle1.top < ballTop and paddle1.bottom > ballBottom:
        ballDx *= -1
    if ballDx == 1 and paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        ballDx *= -1
    return ballDx

def updateScore(paddle2, ball, score, ballDx):
    ballLeft, ballRight = ball[0]-RADIUS, ball[0]+RADIUS
    ballTop, ballBottom = ball[1]-RADIUS, ball[1]+RADIUS
    if ballRight >= WIDTH-THICKNESS:
        score -= 1
    elif ballDx == 1 and paddle2.left <= ballRight and paddle2.top < ballTop and paddle2.bottom > ballBottom:
        score += 1
    return score

def displayScore(score):
    resSurface = FONT.render("Score: %s" %(score), True, WHITE)
    resRect = resSurface.get_rect()
    resRect.topleft = (50, 25)
    SURFACE.blit(resSurface, resRect)

def naiveAI(ball, ballDx, paddle1):
    if ballDx == 1:
        if paddle1.centery < (HEIGHT)/2:
            paddle1.y += VELOCITY
        elif paddle1.centery > (HEIGHT)/2:
            paddle1.y -= VELOCITY
    elif ballDx == -1:
        if paddle1.centery < ball[1]:
            paddle1.y += VELOCITY
        else:
            paddle1.y -= VELOCITY
    return paddle1

In [None]:
## Main

def main():
    pygame.init()
    global SURFACE
    global FONT, FONTSIZE
    FONTSIZE = 20
    FONT = pygame.font.Font("freesansbold.ttf", FONTSIZE)
    
    FPSCLOCK = pygame.time.Clock()
    SURFACE = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("BoringPong")
    
    ballX = int(WIDTH/2)
    ballY = int(HEIGHT/2)
    P1X = PADDLEOFFSET
    P2X = WIDTH - PADDLEOFFSET - THICKNESS
    P1Y = int((HEIGHT-PADDLESIZE)/2)
    P2Y = int((HEIGHT-PADDLESIZE)/2)
    score = 0
    ballDx = -1
    ballDy = -1
    
    paddle1 = pygame.Rect(P1X, P1Y, THICKNESS, PADDLESIZE)
    paddle2 = pygame.Rect(P2X, P2Y, THICKNESS, PADDLESIZE)
    ball = (ballX, ballY)
    
    drawCanvas()
    drawPaddle(paddle1)
    drawPaddle(paddle2)
    drawBall(ball)
    
    pygame.mouse.set_visible(0)
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.display.quit()
#                 pygame.quit()
#                 sys.exit()
            elif event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                while True: 
                    event = pygame.event.wait()
                    if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
                        break
            elif event.type == pygame.MOUSEMOTION:
                pX, pY = event.pos
                paddle2.y = pY 
                
        drawCanvas()
        drawPaddle(paddle1)
        drawPaddle(paddle2)
        drawBall(ball)        
        
        ball = moveBall(ball, ballDx, ballDy)
        ballDx, ballDy = checkEdgeHit(ball, ballDx, ballDy)
        score = updateScore(paddle2, ball, score, ballDx)
        ballDx = checkPaddleHit(ball, paddle1, paddle2, ballDx)
        paddle1 = naiveAI(ball, ballDx, paddle1)
        
        displayScore(score)
        
        pygame.display.update()
        FPSCLOCK.tick(FPS)
        
if __name__=="__main__":
    main()

# Naive AI vs Human

A naive reflex agent(left) play against human(right)