In [1]:
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
import numpy as np
from ball import *

# Ball 클래스
class Ball():
    def __init__(self, x, y, w, h, c, speedx, speedy):
        super().__init__()

        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.c = c
        self.speedx = speedx
        self.speedy = speedy

    def draw(self, painter):
        self.x += self.speedx
        self.y += self.speedy

        painter.setBrush(self.c)
        painter.setPen(self.c)
        painter.drawEllipse(QRectF(self.x, self.y, self.w, self.h))

# Line 클래스
class Line():
    def __init__(self, ix, iy, c, angle):
        # 선의 시작점(흰 공의 중심)
        self.sx = ix + 10
        self.sy = iy + 10
        self.c = c
        self.angle = angle

    def draw(self, painter):
        # 선 방정식 구하기
        if self.angle == 0:
            self.tx = 0
            self.ty = 290
        elif self.angle != 90:
            self.a = np.tan(self.angle / 180 * np.pi)  # 기울기
            self.b = self.sy - self.a * self.sx  # 절편

            if self.a > 0:  # 기울기가 양수일 때
                # 선이 벽과 만나는 점의 좌표값
                self.tx = max(-1 * self.b / self.a, 0)
                self.ty = max(self.b, 0)

            elif self.a < 0:  # 기울기가 음수일 때
                # 선과 벽이 만나는 점의 좌표값
                self.tx = min(-1 * self.b / self.a, 400)
                self.ty = max(0, 400 * self.a + self.b)

        else:  # 각이 90도 -> 기울기 무한대
            self.tx = 210
            self.ty = 0
            self.a = 9999
            self.b = 0

        painter.setPen(self.c)
        painter.drawLine(self.sx, self.sy, self.tx, self.ty)

# Game Class
class MainWindowA(QMainWindow):
    def __init__(self):
        super().__init__()

        self.label = QLabel()

        canvas = QPixmap(400, 300)
        self.label.setPixmap(canvas)
        self.setCentralWidget(self.label)

        self.redBall = Ball(0, 0, 10, 10, Qt.red, 0, 0)
        self.whtBall = Ball(200, 250, 20, 20, Qt.white, 0, 0)
        self.shtLine = Line(200, 280, Qt.cyan, 90)

        self.timecounter = 0
        self.init_ball()
        self.explosion = False

        self.exp_img = QImage('exp.png')
        # self.collide = False
        self.timer = None
        self.exp_pos = QPointF(0.0, 0.0)

        self.firing = False

        self.timer_start()

    def draw(self):
        self.timecounter += 1
        painter = QPainter(self.label.pixmap())
        painter.fillRect(0, 0, 400, 300, QBrush(Qt.black))

        self.shtLine.draw(painter)
        self.redBall.draw(painter)
        self.whtBall.draw(painter)

        if self.firing:
            painter.setPen(Qt.red)  # 빔 발사
            painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)
            self.whtBall.draw(painter)

        if self.timecounter % 90 == 0:
            self.init_ball()

        if self.explosion:
            self.sprite_anim(painter)

        self.label.repaint()

        if self.collide:
            return

        # Collision Detection
        if self.firing:
            # 명중 조건 - 조준선과 빨간볼 중심 사이의 거리가 빨간공 반지름보다 작거나 같을 때
            if np.abs(self.shtLine.a * (self.redBall.x+self.redBall.w/2) -\
                    self.redBall.y+self.redBall.h/2+ self.shtLine.b)/np.sqrt(
                self.shtLine.a**2 + 1) <= self.redBall.w/2:
                #print('명중')
                #print('빨간공 위치(%d, %d), 선 각도 %d, 기울기 %f, 절편 %f' % (self.redBall.x, self.redBall.y,
                #                                                  self.shtLine.angle, self.shtLine.a, self.shtLine.b))
                self.timecounter = 0
                self.firing = False
                self.collide = True
                if not self.explosion:
                    self.explosion = True
                    self.exp_pos.x = self.redBall.x - 20  # 폭발 위치
                    self.exp_pos.y = self.redBall.y - 20
                    self.init_ball()
            else:
                # print('불발')
                # print('빨간공 위치(%d, %d), 선 각도 %d, 기울기 %f, 절편 %f' % (self.redBall.x, self.redBall.y,
                #                                                  self.shtLine.angle, self.shtLine.a, self.shtLine.b))
                self.firing = False

    def init_ball(self):
        self.redBall.x = np.random.uniform(low=0, high=380, size=1)
        self.redBall.y = 0
        self.whtBall.y = 280
        self.whtBall.speedy = 0
        self.collide = False
        print('init_ball()')

    def timer_start(self):
        self.timer = QTimer()
        self.timer.setInterval(30)
        self.timer.timeout.connect(self.draw)
        self.timer.start()

    def timer_stop(self):
        self.timer.stop()

    def keyPressEvent(self, evt):
        if evt.key() == Qt.Key_Space:
            self.fire()
        if evt.key() == Qt.Key_Right:
            self.move_right()
        elif evt.key() == Qt.Key_Left:
            self.move_left()

    def sprite_anim(self, painter):
        s = 62
        target = QRectF(self.exp_pos.x, self.exp_pos.y, 62.0, 62.0)
        source = QRectF(0, 0, 62.0, 62.0)  # 폭발 스프라이트의 첫 프레임만 표시한다
        painter.drawImage(target, self.exp_img, source)

        self.explosion = False

    def move_right(self):
        self.shtLine.angle += 1
        # print(self.shtLine.angle)

    def move_left(self):
        self.shtLine.angle -= 1
        # print(self.shtLine.angle)

    def fire(self):
        self.firing = True


#app = QApplication(sys.argv)
#window = MainWindowA()
#window.show()
#app.exec_()

init_ball()
init_ball()


  painter.drawLine(self.sx, self.sy, self.tx, self.ty)


init_ball()


  painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)


init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


0

In [5]:
# Environment Class

import gym
from gym import spaces
import numpy as np
import threading
import time
import sys
from PyQt5.QtWidgets import *

# Agent 가 학습할 때 사용하는 환경을 제공하는 클래스
# Agent 는 학습을 위해 이 클래스의 멤버변수인 action_space, observation_sapace 를 사용하고
# step(), reset() 함수를 호출할 것이므로 이들 변수와 함수를 통해서 게임이 시작되어 작동하며
# 종료될 수 있어야 한다.
# 이 게임은 step()함수가 1000번 호출되면 1회의 에피소드가 실행된 것으로 작성하였다

class GameEnv(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(GameEnv, self).__init__()

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        #self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        # Example for using image as input:
        #self.observation_space = spaces.Box(low=0, high=380, shape=
        #(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=380, shape=(2,), dtype=np.float32)

        self.score = 0
        self.total_scores = 0
        self.done = False
        self.counts = 0

        #게임 클래스에 게임루프(무한루프)가 실행되므로 현재 코드와 동시에 실행하도록 하려면 Thread 가 필요함
        self.game = None
        self.game_thread = threading.Thread(target=self.game_window)
        #self.game_thread.daemon = True   # 여기서는 절대 사용 금지
        self.game_thread.start()

        print('Env 생성')

    # 게임을 쓰레드로 실행하기 위해 함수에 포함
    def game_window(self):
        app = QApplication(sys.argv)
        self.game = MainWindowA()
        self.game.show()
        app.exec_()

    def step(self, action):
        self.score = 0
        if action==0:
            if self.game.shtLine.angle >= 0:
                self.game.move_left()
                if self.game.redBall.x + self.game.redBall.w < self.game.shtLine.tx:
                    self.score = 1
        elif action==1:
            if self.game.shtLine.angle <= 180:
                self.game.move_right()
                if self.game.redBall.x + self.game.redBall.w > self.game.shtLine.tx:
                    self.score = 1
        elif action==2:
            self.game.fire()
            if np.abs(self.game.shtLine.a * (self.game.redBall.x+self.game.redBall.w/2) -\
                    self.game.redBall.y+self.game.redBall.h/2+ self.game.shtLine.b)/np.sqrt(
                self.game.shtLine.a**2 + 1) <= self.game.redBall.w/2:
                self.score = 1
                
        self.counts += 1
        
        if self.counts == 1000:
            self.done = True
            
        self.total_scores += self.score
        obs = [self.game.redBall.x, self.game.shtLine.tx]
        return obs, self.score, self.done, {}

    def render(self, mode='human'):
        pass

    def reset(self):
        self.total_scores = 0
        self.counts = 0
        self.done = False
        self.game.init_ball()
        obs = [self.game.redBall.x, self.game.shtLine.tx]
        return obs

    # Env 작동 테스트
    def test(self):
        while not self.game:
            time.sleep(0.1)
        for i in range(1, 101):  # 100회의 에피소드 테스트
            obs = self.reset()

            while not self.done:
                action = self.action_space.sample()
                obs, score, done, _ = self.step(action)
                print('Episode:', i, 'Step:', self.counts, 'Score:',self.score, 'Total Score:', self.total_scores)
                time.sleep(0.03)


#gameEnv = GameEnv()
#gameEnv.test()


Env 생성
init_ball()
init_ball()
Episode: 1 Step: 1 Score: 0 Total Score: 0
Episode: 1 Step: 2 Score: 0 Total Score: 0
Episode:

  painter.drawLine(self.sx, self.sy, self.tx, self.ty)
  painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)


 1 Step: 3 Score: 0 Total Score: 0
Episode: 1 Step: 4 Score: 0 Total Score: 0
Episode: 1 Step: 5 Score: 0 Total Score: 0
Episode: 1 Step: 6 Score: 0 Total Score: 0
Episode: 1 Step: 7 Score: 1 Total Score: 1
Episode: 1 Step: 8 Score: 0 Total Score: 1
Episode: 1 Step: 9 Score: 0 Total Score: 1
Episode: 1 Step: 10 Score: 0 Total Score: 1
Episode: 1 Step: 11 Score: 1 Total Score: 2
Episode: 1 Step: 12 Score: 0 Total Score: 2
Episode: 1 Step: 13 Score: 1 Total Score: 3
Episode: 1 Step: 14 Score: 1 Total Score: 4
Episode: 1 Step: 15 Score: 0 Total Score: 4
Episode: 1 Step: 16 Score: 0 Total Score: 4
Episode: 1 Step: 17 Score: 0 Total Score: 4
Episode: 1 Step: 18 Score: 0 Total Score: 4
Episode: 1 Step: 19 Score: 1 Total Score: 5
Episode: 1 Step: 20 Score: 0 Total Score: 5
Episode: 1 Step: 21 Score: 0 Total Score: 5
Episode: 1 Step: 22 Score: 0 Total Score: 5
Episode: 1 Step: 23 Score: 0 Total Score: 5
Episode: 1 Step: 24 Score: 0 Total Score: 5
Episode: 1 Step: 25 Score: 0 Total Score: 5
Epi

Episode: 1 Step: 185 Score: 0 Total Score: 50
Episode: 1 Step: 186 Score: 0 Total Score: 50
Episode: 1 Step: 187 Score: 1 Total Score: 51
Episode: 1 Step: 188 Score: 0 Total Score: 51
Episode: 1 Step: 189 Score: 1 Total Score: 52
Episode: 1 Step: 190 Score: 0 Total Score: 52
Episode: 1 Step: 191 Score: 1 Total Score: 53
Episode: 1 Step: 192 Score: 1 Total Score: 54
Episode: 1 Step: 193 Score: 0 Total Score: 54
Episode: 1 Step: 194 Score: 1 Total Score: 55
Episode: 1 Step: 195 Score: 0 Total Score: 55
Episode: 1 Step: 196 Score: 1 Total Score: 56
Episode: 1 Step: 197 Score: 0 Total Score: 56
Episode: 1 Step: 198 Score: 0 Total Score: 56
Episode: 1 Step: 199 Score: 0 Total Score: 56
Episode: 1 Step: 200 Score: 0 Total Score: 56
Episode: 1 Step: 201 Score: 0 Total Score: 56
Episode: 1 Step: 202 Score: 1 Total Score: 57
Episode: 1 Step: 203 Score: 0 Total Score: 57
Episode: 1 Step: 204 Score: 0 Total Score: 57
Episode: 1 Step: 205 Score: 0 Total Score: 57
Episode: 1 Step: 206 Score: 0 Tota

Episode: 1 Step: 368 Score: 0 Total Score: 116
Episode: 1 Step: 369 Score: 1 Total Score: 117
Episode: 1 Step: 370 Score: 1 Total Score: 118
Episode: 1 Step: 371 Score: 1 Total Score: 119
Episode: 1 Step: 372 Score: 0 Total Score: 119
Episode: 1 Step: 373 Score: 0 Total Score: 119
Episode: 1 Step: 374 Score: 1 Total Score: 120
Episode: 1 Step: 375 Score: 0 Total Score: 120
Episode: 1 Step: 376 Score: 1 Total Score: 121
Episode: 1 Step: 377 Score: 0 Total Score: 121
Episode: 1 Step: 378 Score: 0 Total Score: 121
Episode: 1 Step: 379 Score: 0 Total Score: 121
Episode: 1 Step: 380 Score: 1 Total Score: 122
Episode: 1 Step: 381 Score: 1 Total Score: 123
Episode: 1 Step: 382 Score: 1 Total Score: 124
Episode: 1 Step: 383 Score: 0 Total Score: 124
Episode: 1 Step: 384 Score: 1 Total Score: 125
Episode: 1 Step: 385 Score: 0 Total Score: 125
Episode: 1 Step: 386 Score: 0 Total Score: 125
Episode: 1 Step: 387 Score: 1 Total Score: 126
Episode: 1 Step: 388 Score: 0 Total Score: 126
Episode: 1 St

Episode: 1 Step: 545 Score: 1 Total Score: 184
Episode: 1 Step: 546 Score: 0 Total Score: 184
Episode: 1 Step: 547 Score: 0 Total Score: 184
Episode: 1 Step: 548 Score: 0 Total Score: 184
Episode: 1 Step: 549 Score: 0 Total Score: 184
Episode: 1 Step: 550 Score: 1 Total Score: 185
Episode: 1 Step: 551 Score: 0 Total Score: 185
Episode: 1 Step: 552 Score: 0 Total Score: 185
Episode: 1 Step: 553 Score: 0 Total Score: 185
Episode: 1 Step: 554 Score: 0 Total Score: 185
Episode: 1 Step: 555 Score: 0 Total Score: 185
Episode: 1 Step: 556 Score: 0 Total Score: 185
Episode: 1 Step: 557 Score: 0 Total Score: 185
Episode: 1 Step: 558 Score: 0 Total Score: 185
Episode: 1 Step: 559 Score: 0 Total Score: 185
Episode: 1 Step: 560 Score: 0 Total Score: 185
Episode: 1 Step: 561 Score: 1 Total Score: 186
Episode: 1 Step: 562 Score: 0 Total Score: 186
Episode: 1 Step: 563 Score: 0 Total Score: 186
Episode: 1 Step: 564 Score: 0 Total Score: 186
Episode: 1 Step: 565 Score: 0 Total Score: 186
Episode: 1 St

Episode: 1 Step: 723 Score: 0 Total Score: 238
Episode: 1 Step: 724 Score: 1 Total Score: 239
Episode: 1 Step: 725 Score: 0 Total Score: 239
Episode: 1 Step: 726 Score: 0 Total Score: 239
Episode: 1 Step: 727 Score: 0 Total Score: 239
Episode: 1 Step: 728 Score: 0 Total Score: 239
Episode: 1 Step: 729 Score: 0 Total Score: 239
Episode: 1 Step: 730 Score: 1 Total Score: 240
Episode: 1 Step: 731 Score: 0 Total Score: 240
Episode: 1 Step: 732 Score: 1 Total Score: 241
Episode: 1 Step: 733 Score: 1 Total Score: 242
Episode: 1 Step: 734 Score: 0 Total Score: 242
Episode: 1 Step: 735 Score: 1 Total Score: 243
Episode: 1 Step: 736 Score: 0 Total Score: 243
Episode: 1 Step: 737 Score: 0 Total Score: 243
Episode: 1 Step: 738 Score: 0 Total Score: 243
Episode: 1 Step: 739 Score: 0 Total Score: 243
Episode: 1 Step: 740 Score: 0 Total Score: 243
Episode: 1 Step: 741 Score: 0 Total Score: 243
Episode: 1 Step: 742 Score: 0 Total Score: 243
Episode: 1 Step: 743 Score: 0 Total Score: 243
Episode: 1 St

KeyboardInterrupt: 

In [17]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# 뉴럴 네트웍 생성
def build_model(actions):
    model = Sequential()
    
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    
    model.add(Dense(actions,activation='linear'))
    return model


from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy, GreedyQPolicy
from rl.policy import EpsGreedyQPolicy
from rl.policy import LinearAnnealedPolicy

def build_agent2(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps',
                                 value_max = 1., value_min=.1, value_test=.05, nb_steps=50000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

env = GameEnv()

actions = env.action_space.n

model = build_model(actions)
print(model.summary())

dqn = build_agent2(model,actions)
dqn.compile(Adam(lr=1e-3),metrics=['mae'])
dqn.fit(env, nb_steps=100000,visualize=False, verbose=0)



Env 생성
init_ball()
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_9 (Flatten)          (None, 2)                 0         
_________________________________________________________________
dense_36 (Dense)             (None, 512)               1536      
_________________________________________________________________
dense_37 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_38 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_39 (Dense)             (None, 3)                 1539      
Total params: 528,387
Trainable params: 528,387
Non-trainable params: 0
_________________________________________________________________
None
init_ball()


  painter.drawLine(self.sx, self.sy, self.tx, self.ty)
  painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)


init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

<tensorflow.python.keras.callbacks.History at 0x1b6a2999310>

In [11]:
# 학습된 모델 디스크에 저장
dqn.save_weights('dqn_angle_shooting_weights.h5f', overwrite=True)


  painter.drawLine(self.sx, self.sy, self.tx, self.ty)


init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


In [2]:
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
import numpy as np
from ball import *

# Ball 클래스
class Ball():
    def __init__(self, x, y, w, h, c, speedx, speedy):
        super().__init__()

        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.c = c
        self.speedx = speedx
        self.speedy = speedy

    def draw(self, painter):
        self.x += self.speedx
        self.y += self.speedy

        painter.setBrush(self.c)
        painter.setPen(self.c)
        painter.drawEllipse(QRectF(self.x, self.y, self.w, self.h))

# Line 클래스
class Line():
    def __init__(self, ix, iy, c, angle):
        # 선의 시작점(흰 공의 중심)
        self.sx = ix + 10
        self.sy = iy + 10
        self.c = c
        self.angle = angle

    def draw(self, painter):
        # 선 방정식 구하기
        if self.angle == 0:
            self.tx = 0
            self.ty = 290
        elif self.angle != 90:
            self.a = np.tan(self.angle / 180 * np.pi)  # 기울기
            self.b = self.sy - self.a * self.sx  # 절편

            if self.a > 0:  # 기울기가 양수일 때
                # 선이 벽과 만나는 점의 좌표값
                self.tx = max(-1 * self.b / self.a, 0)
                self.ty = max(self.b, 0)

            elif self.a < 0:  # 기울기가 음수일 때
                # 선과 벽이 만나는 점의 좌표값
                self.tx = min(-1 * self.b / self.a, 400)
                self.ty = max(0, 400 * self.a + self.b)

        else:  # 각이 90도 -> 기울기 무한대
            self.tx = 210
            self.ty = 0
            self.a = 9999
            self.b = 0

        painter.setPen(self.c)
        painter.drawLine(self.sx, self.sy, self.tx, self.ty)

# Game Class
class MainWindowA(QMainWindow):
    def __init__(self):
        super().__init__()

        self.label = QLabel()

        canvas = QPixmap(400, 300)
        self.label.setPixmap(canvas)
        self.setCentralWidget(self.label)

        self.redBall = Ball(0, 0, 10, 10, Qt.red, 0, 0)
        self.whtBall = Ball(200, 250, 20, 20, Qt.white, 0, 0)
        self.shtLine = Line(200, 280, Qt.cyan, 90)

        self.timecounter = 0
        self.init_ball()
        self.explosion = False

        self.exp_img = QImage('exp.png')
        # self.collide = False
        self.timer = None
        self.exp_pos = QPointF(0.0, 0.0)

        self.firing = False

        self.timer_start()

    def draw(self):
        self.timecounter += 1
        painter = QPainter(self.label.pixmap())
        painter.fillRect(0, 0, 400, 300, QBrush(Qt.black))

        self.shtLine.draw(painter)
        self.redBall.draw(painter)
        self.whtBall.draw(painter)

        if self.firing:
            painter.setPen(Qt.red)  # 빔 발사
            painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)
            self.whtBall.draw(painter)

        if self.timecounter % 90 == 0:
            self.init_ball()

        if self.explosion:
            self.sprite_anim(painter)

        self.label.repaint()

        if self.collide:
            return

        # Collision Detection
        if self.firing:
            # 명중 조건 - 조준선과 빨간볼 중심 사이의 거리가 빨간공 반지름보다 작거나 같을 때
            if np.abs(self.shtLine.a * (self.redBall.x+self.redBall.w/2) -\
                    self.redBall.y+self.redBall.h/2+ self.shtLine.b)/np.sqrt(
                self.shtLine.a**2 + 1) <= self.redBall.w/2:
                #print('명중')
                #print('빨간공 위치(%d, %d), 선 각도 %d, 기울기 %f, 절편 %f' % (self.redBall.x, self.redBall.y,
                #                                                  self.shtLine.angle, self.shtLine.a, self.shtLine.b))
                self.timecounter = 0
                self.firing = False
                self.collide = True
                if not self.explosion:
                    self.explosion = True
                    self.exp_pos.x = self.redBall.x - 20  # 폭발 위치
                    self.exp_pos.y = self.redBall.y - 20
                    self.init_ball()
            else:
                # print('불발')
                # print('빨간공 위치(%d, %d), 선 각도 %d, 기울기 %f, 절편 %f' % (self.redBall.x, self.redBall.y,
                #                                                  self.shtLine.angle, self.shtLine.a, self.shtLine.b))
                self.firing = False

    def init_ball(self):
        self.redBall.x = np.random.uniform(low=0, high=380, size=1)
        self.redBall.y = 0
        self.whtBall.y = 280
        self.whtBall.speedy = 0
        self.collide = False
        print('init_ball()')

    def timer_start(self):
        self.timer = QTimer()
        self.timer.setInterval(30)
        self.timer.timeout.connect(self.draw)
        self.timer.start()

    def timer_stop(self):
        self.timer.stop()

    def keyPressEvent(self, evt):
        if evt.key() == Qt.Key_Space:
            self.fire()
        if evt.key() == Qt.Key_Right:
            self.move_right()
        elif evt.key() == Qt.Key_Left:
            self.move_left()

    def sprite_anim(self, painter):
        s = 62
        target = QRectF(self.exp_pos.x, self.exp_pos.y, 62.0, 62.0)
        source = QRectF(0, 0, 62.0, 62.0)  # 폭발 스프라이트의 첫 프레임만 표시한다
        painter.drawImage(target, self.exp_img, source)

        self.explosion = False

    def move_right(self):
        self.shtLine.angle += 1
        # print(self.shtLine.angle)

    def move_left(self):
        self.shtLine.angle -= 1
        # print(self.shtLine.angle)

    def fire(self):
        self.firing = True

# Environment Class

import gym
from gym import spaces
import numpy as np
import threading
import time
import sys
from PyQt5.QtWidgets import *

# Agent 가 학습할 때 사용하는 환경을 제공하는 클래스
# Agent 는 학습을 위해 이 클래스의 멤버변수인 action_space, observation_sapace 를 사용하고
# step(), reset() 함수를 호출할 것이므로 이들 변수와 함수를 통해서 게임이 시작되어 작동하며
# 종료될 수 있어야 한다.
# 이 게임은 step()함수가 1000번 호출되면 1회의 에피소드가 실행된 것으로 작성하였다

class GameEnv(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(GameEnv, self).__init__()

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        #self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        # Example for using image as input:
        #self.observation_space = spaces.Box(low=0, high=380, shape=
        #(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=380, shape=(2,), dtype=np.float32)

        self.score = 0
        self.total_scores = 0
        self.done = False
        self.counts = 0

        #게임 클래스에 게임루프(무한루프)가 실행되므로 현재 코드와 동시에 실행하도록 하려면 Thread 가 필요함
        self.game = None
        self.game_thread = threading.Thread(target=self.game_window)
        #self.game_thread.daemon = True   # 여기서는 절대 사용 금지
        self.game_thread.start()

        print('Env 생성')

    # 게임을 쓰레드로 실행하기 위해 함수에 포함
    def game_window(self):
        app = QApplication(sys.argv)
        self.game = MainWindowA()
        self.game.show()
        app.exec_()

    def step(self, action):
        self.score = 0
        if action==0:
            if self.game.shtLine.angle >= 0:
                self.game.move_left()
                if self.game.redBall.x + self.game.redBall.w < self.game.shtLine.tx:
                    self.score = 1
        elif action==1:
            if self.game.shtLine.angle <= 180:
                self.game.move_right()
                if self.game.redBall.x + self.game.redBall.w > self.game.shtLine.tx:
                    self.score = 1
        elif action==2:
            self.game.fire()
            if np.abs(self.game.shtLine.a * (self.game.redBall.x+self.game.redBall.w/2) -\
                    self.game.redBall.y+self.game.redBall.h/2+ self.game.shtLine.b)/np.sqrt(
                self.game.shtLine.a**2 + 1) <= self.game.redBall.w/2:
                self.score = 1
                
        self.counts += 1
        
        if self.counts == 1000:
            self.done = True
            
        self.total_scores += self.score
        obs = [self.game.redBall.x, self.game.shtLine.tx]
        return obs, self.score, self.done, {}

    def render(self, mode='human'):
        pass

    def reset(self):
        self.total_scores = 0
        self.counts = 0
        self.done = False
        self.game.init_ball()
        obs = [self.game.redBall.x, self.game.shtLine.tx]
        return obs

    # Env 작동 테스트
    def test(self):
        while not self.game:
            time.sleep(0.1)
        for i in range(1, 101):  # 100회의 에피소드 테스트
            obs = self.reset()

            while not self.done:
                action = self.action_space.sample()
                obs, score, done, _ = self.step(action)
                print('Episode:', i, 'Step:', self.counts, 'Score:',self.score, 'Total Score:', self.total_scores)
                time.sleep(0.03)

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# 뉴럴 네트웍 생성
def build_model(actions):
    model = Sequential()
    
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    model.add(Dense(512,activation='relu'))
    
    model.add(Dense(actions,activation='linear'))
    return model


from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy, GreedyQPolicy
from rl.policy import EpsGreedyQPolicy
from rl.policy import LinearAnnealedPolicy

def build_agent2(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps',
                                 value_max = 1., value_min=.1, value_test=.05, nb_steps=50000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [3]:
env = GameEnv()

actions = env.action_space.n

model = build_model(actions)
print(model.summary())

dqn = build_agent2(model,actions)
dqn.compile(Adam(lr=1e-3),metrics=['mae'])
dqn.fit(env, nb_steps=100000,visualize=False, verbose=0)

Env 생성
init_ball()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 2)                 0         
_________________________________________________________________
dense (Dense)                (None, 512)               1536      
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 1539      
Total params: 528,387
Trainable params: 528,387
Non-trainable params: 0
_________________________________________________________________
None
init_ball()
Instructions for updating:
This property should not be used in TensorFlow 2.0, a

  painter.drawLine(self.sx, self.sy, self.tx, self.ty)
  painter.drawLine(self.shtLine.sx, self.shtLine.sy, self.shtLine.tx, self.shtLine.ty)


init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


<tensorflow.python.keras.callbacks.History at 0x240b4b4bf40>

init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()
init_ball()


In [4]:
# 저장된 모델 로드
#env = GameEnv()
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states)
dqn = build_agent2(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.load_weights('dqn_angle_shooting_weights.h5f')

scores = dqn.test(env, nb_episodes=1000, visualize=True)
print(np.mean(scores.history['episode_reward']))

ValueError: Model output "Tensor("dense_11/BiasAdd:0", shape=(None, 2), dtype=float32)" has invalid shape. DQN expects a model that has one dimension for each action, in this case 3.