In [40]:
import numpy as np
import math
import random
import pprint
import pygame
import sys

In [331]:
#===========CHANGABLE PARAMETERS=================
REWARD     = 1
PENALTY    = -1
DISCOUNT   = 0.9
LEARN_RATE_CONST = 200

def get_explore_rate(epoch):
    return max(0.1,1-math.log2(epoch*1000)/20)

#===========DEFINE CONSTANTS AND DICS=============
WALL_LEN = 1
PADDLE_H = 0.2

init_state = (0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2)

ACTION_DIC={0:-0.04, #'UP'
            1:0,     #'STAY'
            2:0.04}  #'DOWN'

BOARD_SIZE = 12
X_VBALL_DIS = [-1,1]
Y_VBALL_DIS = [-1,0,1]

PADDLE_SPACE = 12
PADDLE_X     = 1

STATE_SPACE = (BOARD_SIZE,BOARD_SIZE,len(X_VBALL_DIS),len(Y_VBALL_DIS),PADDLE_SPACE)

X_V_TSH = 0.03
Y_V_TSH = 0.015

In [335]:
#==============DEFINE STATE CLASS===============
class state:
    
    def __init__(self,ball_x,ball_y,velocity_x,velocity_y,paddle_y,reward,end_state = 0):
        self.ball_x = ball_x              #real numbers on the interval [0,1]
        self.ball_y = ball_y
        self.velocity_x = velocity_x
        self.velocity_y = velocity_y
        self.paddle_y = paddle_y
        self.reward = reward
        self.state_tuple = (ball_x,ball_y,velocity_x,velocity_y,paddle_y)
        self.end_state = end_state
        self._extract()
        
    def _extract(self):
        self.x_grid = min(math.floor(12*self.ball_x),BOARD_SIZE-1)
        self.y_grid = min(math.floor(12*self.ball_y),BOARD_SIZE-1)
        if(self.velocity_x>0): 
            self.x_v_sign = 0
        else: 
            self.x_v_sign = 1
            
        if(self.velocity_y>=0.015):
            self.y_v_sign = 0
        elif(self.velocity_y<=0.015): 
            self.y_v_sign = 1
        else:
            self.y_v_sign = 2
        self.paddle_grid = min(math.floor(12 * self.paddle_y / (1 - PADDLE_H)),PADDLE_SPACE-1)
        self.space_tuple = (self.x_grid,self.y_grid,self.x_v_sign,self.y_v_sign,self.paddle_grid)

In [344]:
#=======DEFINE MORE HELPER FUNCTIONS===========
def bounce(cur_state,action):
#     print('ENTER!!!!')
    n_ball_v_x = cur_state.velocity_x
    n_ball_v_y = cur_state.velocity_y
    
    n_ball_x = cur_state.ball_x + n_ball_v_x
    n_ball_y = cur_state.ball_y + n_ball_v_y

    fac = n_ball_v_y*(n_ball_x-1)/n_ball_v_x
    cmp_y = cur_state.ball_y + fac
    #======paddle should in range(0,0.8)
    n_paddle_y = min(WALL_LEN-PADDLE_H,max(0,cur_state.paddle_y + action))
    
    c_reward = 0
    
    #==the ball is off the top of the screen==
    if n_ball_y < 0:
#         print('1111111111111111111111111')
        n_ball_y = -n_ball_y
        n_ball_v_y = -n_ball_v_y
        
    #==the ball is off the bottom of the screen==
    if n_ball_y > 1:
#         print('22222222222222222222222222')
        n_ball_y = 2*WALL_LEN-n_ball_y
        n_ball_v_y = -n_ball_v_y
        
    #==the ball is off the left edge of the screen==
    if n_ball_x < 0:
#         print('33333333333333333333333333')
        n_ball_x = -n_ball_x
        n_ball_v_x = -n_ball_v_x
        
    #== the ball bouncing off the paddle==
#     print('n_paddle_y+PADDLE_H',n_paddle_y+PADDLE_H,'n_ball_y', n_ball_y,'n_paddle_y', n_paddle_y)
#     print(n_paddle_y+PADDLE_H <= n_ball_y <= n_paddle_y)
#     print(n_ball_x >= 1)
    if n_ball_x >= 1 and (n_paddle_y+PADDLE_H >= min(max(0,cmp_y),1) >= n_paddle_y):
#         print('yuayyyaysaysdyuasfhiuadsfhiua!')
        n_ball_x = 2*PADDLE_X - n_ball_x
        n_ball_v_x, n_ball_v_y = update_speed_rand(n_ball_v_x,n_ball_v_y)
        c_reward = REWARD
    elif n_ball_x > 1:
#         print('444444444444444444444444444')
        c_reward = PENALTY
#     if(c_reward == 1):
#         print(n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward)
    return n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward

def update_speed_rand(velocity_x,velocity_y):
    vx_delta = random.choice(range(-1,2,2)) * 0.015
    vy_delta = random.choice(range(-1,2,2)) * 0.03
    sign_x = -velocity_x/abs(velocity_x)
    sign_y = velocity_y/abs(velocity_y)
    n_ball_v_x = sign_x*max(X_V_TSH,min(1,abs(vx_delta+velocity_x)))
    n_ball_v_y = sign_y*max(Y_V_TSH,min(1,abs(vy_delta+velocity_y)))
    return n_ball_v_x, n_ball_v_y

# def check_termination(cur_state):
#     pad_rang = range(cur_state.paddle_y-PADDLE_H,cur_state.paddle_y)
#     if cur_state.ball_x>=WALL_LEN and cur_state.ball_y not in pad_rang:
#         return True
#     else:
#         return False
        
def proceed_one_step(cur_state,action):

    n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward = bounce(cur_state,action)
    end = 0
    if c_reward == -1:
        end = 1
#     print(n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward,end,action)
    n_state = state(n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward,end_state=end)
    return n_state

In [148]:
random.choice(range(-1,2,2))

-1

In [54]:
#=============DEFINE Q-AGENT CLASS==============
class q_agent:
    
    def __init__(self):
        self.q_table = np.zeros(STATE_SPACE+(len(ACTION_DIC),)) 
        self.end_state = 0
        
    def set_table(self,loc,val):
        self.q_table[loc] = val
    
    def get_table(self,loc):
#         print(loc)
        return self.q_table[loc]
    
    def get_act(self,cur_state,i,mode = 'train'):
        if mode=='train' and random.random()<get_explore_rate(i):
            return random.choice(range(0,3,1))
        return np.argmax(self.get_table(cur_state.space_tuple))
    

In [124]:
#=============DEFINE TRAIN FUCNTION=============
#init_state = (0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2)
#self.space_tuple = (self.x_grid,self.y_grid,self.x_v_sign,self.y_v_sign,self.paddle_grid)
def train(epoch_num,q_ag):
    pre_tot = 0
    tot_bounce = 0
    for i in range(1,epoch_num+1):
#         print('i=',i)
        temp_bounce = 0
        cur_state = state(0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2,0)
        while True:
            action = q_ag.get_act(cur_state,i)
#             action = 1
#             print('act=',action)
            n_state = proceed_one_step(cur_state,ACTION_DIC[action])
            old_val = q_ag.get_table(cur_state.space_tuple+(action,))


            prd_max = np.max(q_ag.get_table(n_state.space_tuple))
            prd_tuple = (n_state.x_grid,n_state.y_grid,n_state.x_v_sign,n_state.y_v_sign,n_state.paddle_grid,prd_max)

            new_val = (1-LEARN_RATE)*old_val + LEARN_RATE*(n_state.reward + DISCOUNT*prd_max)
            q_ag.set_table(cur_state.space_tuple+(action,),new_val)
#             print('22222222222=',prd_max)
#             print('33333333333333=',n_state.space_tuple+(action,))
#             print('44444444444=',q_ag.get_table(n_state.space_tuple+(action,)))
            if n_state.end_state == 1:
                break
            if n_state.reward == REWARD:
                temp_bounce+=1
#                 print('this round=',temp_bounce)
            cur_state = n_state
        tot_bounce+=temp_bounce
#         print()
#         print()
        if i%1000 == 0:
            print('loop',i)
            print(q_ag.q_table.sum())
            print('now average bounce is', (tot_bounce-pre_tot)/1000)
            print('bounce=',tot_bounce)
            print()
            pre_tot = tot_bounce

print(n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward,end)

print(n_ball_x,n_ball_y,n_ball_v_x,n_ball_v_y,n_paddle_y,c_reward,end,action)

In [342]:
q_ag = q_agent()
train(30000,q_ag)

loop 1000
-24.5932202204
now average bounce is 1.509
bounce= 1509

loop 2000
-45.4424088
now average bounce is 2.204
bounce= 3713

loop 3000
-79.0731851157
now average bounce is 2.4
bounce= 6113

loop 4000
-118.921839507
now average bounce is 2.559
bounce= 8672

loop 5000
-162.128206923
now average bounce is 2.633
bounce= 11305

loop 6000
-208.755214363
now average bounce is 2.697
bounce= 14002

loop 7000
-258.169518625
now average bounce is 2.696
bounce= 16698

loop 8000
-308.31994975
now average bounce is 2.777
bounce= 19475

loop 9000
-362.895688873
now average bounce is 2.717
bounce= 22192

loop 10000
-415.581691331
now average bounce is 2.731
bounce= 24923

loop 11000
-474.513599725
now average bounce is 2.631
bounce= 27554

loop 12000
-529.811673879
now average bounce is 2.793
bounce= 30347

loop 13000
-570.440750608
now average bounce is 2.824
bounce= 33171

loop 14000
-619.14051384
now average bounce is 2.811
bounce= 35982

loop 15000
-660.128875846
now average bounce is 2.909


In [9]:
def test(epoch_num,q_ag):
    tot_bounce = 0
    for i in range(epoch_num):
        temp_bounce = 0
        cur_state = state(0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2,0)
        while True:
            action = q_ag.get_act(cur_state,i,mode='hei')
            n_state = proceed_one_step(cur_state,ACTION_DIC[action])
            if n_state.end_state == 1:
                break
            if n_state.reward == REWARD:
                temp_bounce+=1
            cur_state = n_state
        tot_bounce+=temp_bounce
#         print('loop',i)
#         print('now average bounce is', tot_bounce/(i+1))
#         print('bounce in this game=',temp_bounce)
#         print()
    print('the avg bounce =',tot_bounce/epoch_num)

In [326]:
test(1,q_ag)

the avg bounce = 1.0


In [27]:
# Define some colors
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
RED = RED = (194,24,7)

SCREEN_WIDTH = 1005
SCREEN_HEIGHT = 1000
BALL_SIZE = 5

GAME_FPS = 60
GAME_END_FPS = 60

def play(epoch_num,q_ag):
    """
    This is our main program.
    """
    pygame.init()
    size = [SCREEN_WIDTH, SCREEN_HEIGHT]
    screen = pygame.display.set_mode(size)

    pygame.display.set_caption("Pong")
 
    # Loop until the user clicks the close button.
    done = False
 
    # Used to manage how fast the screen updates
    clock = pygame.time.Clock()
 
    tot_bounce = 0
    for i in range(epoch_num):
        temp_bounce = 0
        cur_state = state(0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2,0)
        while True:
            screen.fill(WHITE)
            action = q_ag.get_act(cur_state,i,mode='hei')
            print('act:',action)
            n_state = proceed_one_step(cur_state,ACTION_DIC[action])
            
            ball = [int(n_state.ball_x*1000),int(n_state.ball_y*1000)]
            print('ball',ball)
            rect = [1000,int(n_state.paddle_y*1000),5,PADDLE_H*1000]
            print('rect',rect)
            b = pygame.draw.circle(screen, BLACK, ball, BALL_SIZE)
            pad = pygame.draw.rect(screen,BLACK,rect)
            pygame.display.update()
            # Limit to 60 frames per second
            clock.tick(60)

            if n_state.end_state == 1:
                break
            if n_state.reward == REWARD:
                temp_bounce+=1
            cur_state = n_state
    pygame.quit()

In [157]:
play(200,q_ag)

act: 2
ball [530, 510]
rect [1000, 440, 5, 200.0]
act: 2
ball [560, 520]
rect [1000, 480, 5, 200.0]
act: 2
ball [590, 530]
rect [1000, 520, 5, 200.0]
act: 2
ball [620, 540]
rect [1000, 560, 5, 200.0]
act: 2
ball [650, 550]
rect [1000, 600, 5, 200.0]
act: 2
ball [680, 560]
rect [1000, 640, 5, 200.0]
act: 2
ball [710, 570]
rect [1000, 680, 5, 200.0]
act: 1
ball [740, 580]
rect [1000, 680, 5, 200.0]
act: 0
ball [770, 590]
rect [1000, 640, 5, 200.0]
act: 2
ball [800, 600]
rect [1000, 680, 5, 200.0]
act: 0
ball [830, 610]
rect [1000, 640, 5, 200.0]
act: 1
ball [860, 620]
rect [1000, 640, 5, 200.0]
act: 1
ball [890, 630]
rect [1000, 640, 5, 200.0]
act: 1
ball [920, 640]
rect [1000, 640, 5, 200.0]
act: 0
ball [950, 650]
rect [1000, 600, 5, 200.0]
act: 0
ball [980, 660]
rect [1000, 560, 5, 200.0]
act: 2
ball [989, 670]
rect [1000, 600, 5, 200.0]
act: 2
ball [944, 710]
rect [1000, 640, 5, 200.0]
act: 2
ball [899, 750]
rect [1000, 680, 5, 200.0]
act: 0
ball [854, 790]
rect [1000, 640, 5, 200.0]


act: 2
ball [950, 810]
rect [1000, 680, 5, 200.0]
act: 0
ball [995, 850]
rect [1000, 640, 5, 200.0]
act: 2
ball [1040, 890]
rect [1000, 680, 5, 200.0]
act: 2
ball [530, 510]
rect [1000, 440, 5, 200.0]
act: 2
ball [560, 520]
rect [1000, 480, 5, 200.0]
act: 2
ball [590, 530]
rect [1000, 520, 5, 200.0]
act: 2
ball [620, 540]
rect [1000, 560, 5, 200.0]
act: 2
ball [650, 550]
rect [1000, 600, 5, 200.0]
act: 2
ball [680, 560]
rect [1000, 640, 5, 200.0]
act: 2
ball [710, 570]
rect [1000, 680, 5, 200.0]
act: 1
ball [740, 580]
rect [1000, 680, 5, 200.0]
act: 0
ball [770, 590]
rect [1000, 640, 5, 200.0]
act: 2
ball [800, 600]
rect [1000, 680, 5, 200.0]
act: 0
ball [830, 610]
rect [1000, 640, 5, 200.0]
act: 1
ball [860, 620]
rect [1000, 640, 5, 200.0]
act: 1
ball [890, 630]
rect [1000, 640, 5, 200.0]
act: 1
ball [920, 640]
rect [1000, 640, 5, 200.0]
act: 0
ball [950, 650]
rect [1000, 600, 5, 200.0]
act: 0
ball [980, 660]
rect [1000, 560, 5, 200.0]
act: 2
ball [989, 670]
rect [1000, 600, 5, 200.0]

KeyboardInterrupt: 

In [14]:
ball

NameError: name 'ball' is not defined

In [35]:
def draw(canvas, ball_x, ball_y, paddle_y):
    canvas.fill(BACKGROUND_COLOR)
    pygame.draw.line(canvas, WHITE, [WIDTH // 2, 0], [WIDTH // 2, HEIGHT], 1)
    pygame.draw.line(canvas, WHITE, [PAD_WIDTH, 0], [PAD_WIDTH, HEIGHT], 1)
    pygame.draw.line(canvas, WHITE, [WIDTH - PAD_WIDTH, 0], [WIDTH - PAD_WIDTH, HEIGHT], 1)

    paddle1_pos = (WIDTH + 1 - HALF_PAD_WIDTH, int(paddle_y * HEIGHT + HALF_PAD_HEIGHT))
    ball_pos = (int(ball_x * WIDTH) , int(ball_y * HEIGHT))

    pygame.draw.circle(canvas, BALL_COLOR, ball_pos, BALL_RADIUS, 0)
    pygame.draw.polygon(canvas, PAD_COLOR, [[paddle1_pos[0] - HALF_PAD_WIDTH, paddle1_pos[1] - HALF_PAD_HEIGHT],
                                        [paddle1_pos[0] - HALF_PAD_WIDTH, paddle1_pos[1] + HALF_PAD_HEIGHT],
                                        [paddle1_pos[0] + HALF_PAD_WIDTH, paddle1_pos[1] + HALF_PAD_HEIGHT],
                                        [paddle1_pos[0] + HALF_PAD_WIDTH, paddle1_pos[1] - HALF_PAD_HEIGHT]], 0)

In [44]:
WHITE = (255, 255, 255)
BALL_COLOR = (44, 62, 80)
PAD_COLOR = (41, 128, 185)
BACKGROUND_COLOR = (207, 216, 220)
SCORE_COLOR = (25, 118, 210)

WIDTH = 600
HEIGHT = 600

BALL_RADIUS = 8
PAD_WIDTH = 8
PAD_HEIGHT = HEIGHT * 0.2
HALF_PAD_WIDTH = PAD_WIDTH // 2
HALF_PAD_HEIGHT = PAD_HEIGHT // 2
GAME_FPS = 30
GAME_END_FPS = 30


def test_with_gui(agent, epoches, opponent=None):
    pygame.init()
    fps = pygame.time.Clock()

    window = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption('Pong')

    turns_count = []
    lscore = 0
    rscore = 0

    for i in range(epoches):

        temp_bounce = 0
        cur_state = state(0.5, 0.5, 0.03, 0.01, 0.5 - PADDLE_H / 2,0)
        
        while True: 
            
            action = q_ag.get_act(cur_state,i,mode='hei')
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
            
            
            n_state = proceed_one_step(cur_state,ACTION_DIC[action])
            
            
            if n_state.end_state == 1:
                break
            
            draw(window, n_state.ball_x, n_state.ball_y, n_state.paddle_y)
            
            pygame.display.update()
            fps.tick(GAME_FPS)
            cur_state = n_state
        fps.tick(GAME_END_FPS)

    return np.mean(turns_count)

In [343]:
test_with_gui(q_ag,200)

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
