In [2]:
from PyQt5.QtCore import *

# Ball 클래스
class Ball():
    def __init__(self,x,y,w,h,c,speedx,speedy):
        super().__init__()

        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.c = c
        self.speedx = speedx
        self.speedy = speedy

    def draw(self,painter):
        self.x += self.speedx
        self.y += self.speedy

        painter.setBrush(self.c)
        painter.drawEllipse(QRectF(self.x, self.y, self.w, self.h))
        
        
# Game Class
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
import numpy as np
from ball import *
from gym import *

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()

        self.label = QLabel()

        canvas = QPixmap(400, 300)
        self.label.setPixmap(canvas)
        self.setCentralWidget(self.label)

        self.redBall = Ball(0, 0, 20, 8.0, Qt.red, 0, 3)
        self.whtBall = Ball(200, 250, 20, 20, Qt.white, 0, 0)
        self.init_ball()
        self.explosion = False

        self.exp_img = QImage('exp.png')
        #self.collide = False
        self.timer = None
        self.exp_pos = QPointF(0.0, 0.0)

        self.firing = False

        self.timer_start()

    def draw(self):
        painter = QPainter(self.label.pixmap())
        painter.fillRect(0, 0, 400, 300, QBrush(Qt.black))

        self.redBall.draw(painter)
        self.whtBall.draw(painter)

        if self.firing:
            painter.setPen(Qt.yellow)   # 빔 발사
            painter.drawLine(self.whtBall.x+10, 380, self.whtBall.x+10, 0)

        if self.redBall.y > 300:
            self.init_ball()

        if self.explosion:
            self.sprite_anim(painter)

        self.label.repaint()

        if self.collide:
            return

        # Collision Detection
        if self.firing:
            if (self.whtBall.x-10 < self.redBall.x) and (self.whtBall.x+10 > self.redBall.x):
                #print('명중')
                self.firing = False
                self.collide = True
                if not self.explosion:
                    self.explosion = True
                    self.exp_pos.x = self.redBall.x - 20  # 폭발 위치
                    self.exp_pos.y = self.redBall.y - 20
                    self.init_ball()
            else:
                self.firing = False

    def init_ball(self):
        self.redBall.x = np.random.uniform(low=0, high=380, size=1)
        self.redBall.y = 0
        self.whtBall.y = 280
        self.whtBall.speedy = 0
        self.collide = False
        print('init_ball()')

    def timer_start(self):
        self.timer = QTimer()
        self.timer.setInterval(30)
        self.timer.timeout.connect(self.draw)
        self.timer.start()

    def timer_stop(self):
        self.timer.stop()

    def keyPressEvent(self, evt):
        if evt.key()==Qt.Key_Space:
            self.fire()
        if evt.key()==Qt.Key_Right:
            self.move_right()
        elif evt.key()==Qt.Key_Left:
            self.move_left()

    def sprite_anim(self, painter):
        target = QRectF(self.exp_pos.x, self.exp_pos.y, 62.0, 62.0)
        source = QRectF(0, 0, 62.0, 62.0)  # 폭발 스프라이트의 첫 프레임만 표시한다
        painter.drawImage(target, self.exp_img, source)
        self.explosion = False

    def move_right(self):
        self.whtBall.x += 5

    def move_left(self):
        self.whtBall.x -= 5

    def fire(self):
        self.firing = True

#app = QApplication(sys.argv)
#window = MainWindow()
#window.show()
#app.exec_()


# Environment Class

import gym
from gym import spaces
import numpy as np
import threading
import time
import sys
from PyQt5.QtWidgets import *

# Agent 가 학습할 때 사용하는 환경을 제공하는 클래스
# Agent 는 학습을 위해 이 클래스의 멤버변수인 action_space, observation_sapace 를 사용하고
# step(), reset() 함수를 호출할 것이므로 이들 변수와 함수를 통해서 게임이 시작되어 작동하며
# 종료될 수 있어야 한다.
# 이 게임은 step()함수가 1000번 호출되면 1회의 에피소드가 실행된 것으로 작성하였다

class GameEnv(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(GameEnv, self).__init__()

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        #self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        # Example for using image as input:
        #self.observation_space = spaces.Box(low=0, high=380, shape=
        #(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=380, shape=(2,), dtype=np.float32)

        self.score = 0
        self.total_scores = 0
        self.done = False
        self.counts = 0

        #게임 클래스에 게임루프(무한루프)가 실행되므로 현재 코드와 동시에 실행하도록 하려면 Thread 가 필요함
        self.game = None
        self.game_thread = threading.Thread(target=self.game_window)
        #self.game_thread.daemon = True   # 여기서는 절대 사용 금지
        self.game_thread.start()

        print('Env 생성')

    # 게임을 쓰레드로 실행하기 위해 함수에 포함
    def game_window(self):
        app = QApplication(sys.argv)
        self.game = MainWindow()
        self.game.show()
        app.exec_()

    def step(self, action):
        self.score = 0
        if action==0:
            if self.game.whtBall.x > 0:
                self.game.move_left()
                if self.game.redBall.x < self.game.whtBall.x:
                    self.score = 1
        elif action==1:
            if self.game.whtBall.x < 380:
                self.game.move_right()
                if self.game.redBall.x > self.game.whtBall.x:
                    self.score = 1
        elif action==2:
            self.game.fire()
            if (self.game.whtBall.x-10 < self.game.redBall.x) and (self.game.whtBall.x+10 > self.game.redBall.x):
                self.score = 1
        self.counts += 1
        if self.counts == 1000:
            self.done = True
        self.total_scores += self.score
        obs = [self.game.redBall.x, self.game.whtBall.x]
        return obs, self.score, self.done, {}

    def render(self, mode='human'):
        pass

    def reset(self):
        self.total_scores = 0
        self.counts = 0
        self.done = False
        self.game.init_ball()
        obs = [self.game.redBall.x, self.game.whtBall.x]
        return obs

    # Env 작동 테스트
    def test(self):
        while not self.game:
            time.sleep(0.1)
        for i in range(1, 101):  # 100회의 에피소드 테스트
            obs = self.reset()

            while not self.done:
                action = self.action_space.sample()
                obs, score, done, _ = self.step(action)
                print('Episode:', i, 'Step:', self.counts, 'Score:',self.score, 'Total Score:', self.total_scores)
                time.sleep(0.03)


#gameEnv = GameEnv()
#gameEnv.test()


In [37]:
env = GameEnv()
print(env.action_space.sample())
print(env.observation_space.sample())
print(env.observation_space.shape)

Env 생성
0
[240.07364 346.7026 ]
(2,)
init_ball()
init_ball()


In [4]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# 뉴럴 네트웍 생성
def build_model(actions):
    model = Sequential()
    
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    
    model.add(Dense(actions,activation='linear'))
    return model

actions = 3
model = build_model(actions)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 2)                 0         
_________________________________________________________________
dense (Dense)                (None, 512)               1536      
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 1539      
Total params: 528,387
Trainable params: 528,387
Non-trainable params: 0
_________________________________________________________________


In [15]:
del model

In [5]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy, GreedyQPolicy
from rl.policy import EpsGreedyQPolicy
from rl.policy import LinearAnnealedPolicy

def build_agent2(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps',
                                 value_max = 1., value_min=.1, value_test=.05, nb_steps=50000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

env = GameEnv()

dqn = build_agent2(model,3)
dqn.compile(Adam(lr=1e-3),metrics=['mae'])
dqn.fit(env, nb_steps=100000,visualize=False, verbose=0)


Env 생성
init_ball()
init_ball()
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.




init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

<tensorflow.python.keras.callbacks.History at 0x21e338e54c0>

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


In [6]:
# 학습된 모델 디스크에 저장
dqn.save_weights('dqn_shooting_weights.h5f', overwrite=True)

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


In [15]:
# 저장된 모델 로드
#env = GameEnv()
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states)
dqn = build_agent2(model, 3)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.load_weights('dqn_shooting_weights.h5f')

scores = dqn.test(env, nb_episodes=1000, visualize=True)
print(np.mean(scores.history['episode_reward']))

ValueError: Model output "Tensor("dense_27/BiasAdd:0", shape=(None, 2), dtype=float32)" has invalid shape. DQN expects a model that has one dimension for each action, in this case 3.

init_ball()
init_ball()
