## 빅데이터 활용 AI 설계
# 강화학습 : catch 학습결과 평가
- 학습 소스 : 49_RL_catch_keras.ipynb
- 'catch.h5' 파일에서 모델 로딩

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pygame
import collections

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from scipy.misc import imresize

In [3]:
pygame.init()

COLOR_WHITE = (255,255,255)
COLOR_BLACK = (0,0,0)

font = pygame.font.SysFont('Comic Sans MS', 30)

In [4]:
def step(action):
    game_over = False
    reward = 0

    global paddle_x, ball_x, ball_y, num_tries, game_score
    
    pygame.event.pump()

    paddle_x += (action-1)*paddle_velocity
    if paddle_x < 0:
        paddle_x = paddle_velocity
    if paddle_x > 400-50:
        paddle_x = 400-50-paddle_velocity

    screen.fill(COLOR_BLACK)
    
    pygame.draw.rect(screen, COLOR_BLACK, pygame.Rect(0, 360, 400, 40))
    score_text = font.render('Games: %d, Score: %d' % (epoch, num_wins), True, COLOR_WHITE)
    screen.blit(score_text, (200-score_text.get_width()//2, 350+15))

    ball_y += ball_velocity
    ball = pygame.draw.rect(screen, COLOR_WHITE, pygame.Rect(ball_x, ball_y, 20, 20))
    paddle = pygame.draw.rect(screen, COLOR_WHITE, pygame.Rect(paddle_x, 350, 50, 10))

    if ball_y >= 340:
        reward = 1 if ball.colliderect(paddle) else -1
        game_score += reward

        ball_x = np.random.randint(400-20)
        ball_y = 10

        num_tries += 1
        game_over=True
        #print(num_tries, game_score)

    pygame.display.flip()

    frame = pygame.surfarray.array2d(screen)
    clock.tick(30)

    return frame, reward, game_over

### Keras 적용

In [5]:
from keras.models import load_model

model = load_model('catch.h5')
#model.compile(loss='mse', optimizer='rmsprop')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 64)        32832     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               3277312   
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 1539      
Total params: 3,356,835
Trainable params: 3,356,835
Non-trainable params: 0
_________________________________________________________________


- predict() 테스트

In [7]:
a = np.random.rand(1,80,80,4)
q = model.predict(a)

In [8]:
q

array([[-1.7161096, -2.864286 , -1.9324287]], dtype=float32)

### 학습 결과 시연

In [9]:
num_wins = 0
experience = collections.deque(maxlen=50000)

for epoch in range(10):
    # game reset
    frames = collections.deque(maxlen=4)
    paddle_x = 200
    ball_x = np.random.randint(400-20)
    ball_y = 10 # ceiling
    paddle_velocity = 20
    ball_velocity = 10
    num_tries = 0
    game_score = 0
    game_over = False
    first_frame = True
    loss = 0.0

    screen = pygame.display.set_mode((400,400))
    clock = pygame.time.Clock()
    
    # game restart
    while not game_over:
        if not first_frame: s_tm1 = s_t
        
        if first_frame:
            a_t = np.random.randint(3)
        else:
            q = model.predict(s_t)[0]
            a_t = np.argmax(q)
        
        # DO ACTION!!
        frame, r_t, game_over = step(a_t)
        
        if r_t == 1: num_wins += 1
        
        img = (imresize(frame, [80,80]).astype('float'))/255.
        if len(frames)==0:
            for i in range(4): frames.append(img)
        else:
            frames.append(img)
        
        s_t = np.moveaxis(np.array(frames), 0, 2)
        s_t = np.expand_dims(s_t, axis=0) # (1,80,80,4)
        
        if first_frame:
            first_frame=False
            continue
            
        experience.append([s_tm1, a_t, r_t, s_t, game_over])
        
    print('Epoch: %04d/10 | Win Count: %d' % (epoch+1, num_wins))
    
    pygame.draw.rect(screen, COLOR_BLACK, pygame.Rect(0, 360, 400, 40))
    score_text = font.render('Games: %d, Score: %d' % (epoch+1, num_wins), True, COLOR_WHITE)
    screen.blit(score_text, (200-score_text.get_width()//2, 350+15))
    pygame.display.flip()

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


Epoch: 0001/10 | Win Count: 1
Epoch: 0002/10 | Win Count: 1
Epoch: 0003/10 | Win Count: 1
Epoch: 0004/10 | Win Count: 1
Epoch: 0005/10 | Win Count: 1
Epoch: 0006/10 | Win Count: 2
Epoch: 0007/10 | Win Count: 3
Epoch: 0008/10 | Win Count: 4
Epoch: 0009/10 | Win Count: 4
Epoch: 0010/10 | Win Count: 4


In [10]:
pygame.quit()