In [1]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque
import numpy as np
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class DQNAgent:

    def __init__(self, state_size, action_size): # state_size = 70, action_size = 128
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, index, reward, next_state, done):
        self.memory.append((state, index, reward, next_state, done))

    def act(self, state): # TODO
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size): # TODO
        minibatch = random.sample(self.memory, batch_size)
        for state, index, reward, next_state, done in minibatch:
            target = reward
            if not done:
              target = reward + self.gamma * np.amax(self.model.predict(next_state)[0]) # TODO: Discretize action space?
            target_f = self.model.predict(state)
            target_f[0][index] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [3]:
def index_to_action(i):
    move_dirs = [[1, 0], [-1, 0], [0, 1], [0, -1], [1, 1], [1, -1], [-1, 1], [-1, -1]]
    to_shoot = i % 2
    shoot_dir = (i % 16 // 2) * np.pi / 4
    action = move_dirs[i // 16]
    action.extend((shoot_dir, to_shoot))
    return action

In [4]:
import AllIsCircle as AC
def reward_func(game_vec):
    game = AC.game(game_vec)
    p0 = game.ship_0
    if (p0.health <= 0):
        return -1000
    p1 = game.ship_1
    if (p1.health <= 0):
        return 1000
    r0 = np.array((p0.x, p0.y))
    r1 = np.array((p1.x, p1.y))
    reward = 0
    for b in game.bullets_list:
        br = np.array((b.x, b.y))
        b_vel = np.array((b.vx, b.vy))
        reward += min(((br - r0).dot(b_vel) / np.linalg.norm(r0 - br) ** 2 + 2, 0))
        reward += max(((r1 - br).dot(b_vel) / np.linalg.norm(r1 - br) ** 2 + 2, 0))
    return reward

In [None]:
import ShipsEnv as se
import DumbAI as dai
record = open("performance", "w")
training_result = []
episodes = 4000

# initialize gym environment and the agent
env = se.ShipsEnv(False)
agent = DQNAgent(70, 128)
# Iterate the game
for e in range(episodes):
    # reset state in the beginning of each game
    state = env.reset() # TODO: Check what the reset method is?
    state = np.reshape(state, [1, 70])
    # time_t represents each frame of the game
    # Our goal is to keep the pole upright as long as possible until score of 500
    # the more time_t the more score
    for time_t in range(1000):
        # turn this on if you want to render
        # env.render()
        # Decide action
        index = agent.act(state)
        action = index_to_action(index)
        # Advance the game to the next frame based on the action.
        # Reward is 1 for every frame the pole survived
        done, next_state = env.step(action, dai.act(state[0]))
        next_state = np.reshape(next_state, [1, 70])
        reward = reward_func(next_state[0])
        # Remember the previous state, action, reward, and done
        agent.remember(state, index, reward, next_state, done)
        # make next_state the new current state for the next frame.
        state = next_state
        # done becomes True when the game ends
        # ex) The agent drops the pole
        if done:
            # print the score and break out of the loop
            print("episode: {}/{}, score: {}, time: {}, done: {}"
                  .format(e, episodes, reward, time_t, done))
            break
    # train the agent with the experience of the episode
    training_result.append(time_t)
    num_mem = len(agent.memory)
    if num_mem > 32:
        num_mem = 32
    agent.replay(num_mem)
for e in training_result:
    record.write(str(e) + " ")

episode: 0/4000, score: -1000, time: 74, done: -1
episode: 1/4000, score: -1000, time: 74, done: -1
episode: 2/4000, score: -1000, time: 73, done: -1
episode: 3/4000, score: -1000, time: 75, done: -1
episode: 4/4000, score: -1000, time: 75, done: -1
episode: 5/4000, score: -1000, time: 73, done: -1
episode: 6/4000, score: -1000, time: 74, done: -1
episode: 7/4000, score: -1000, time: 73, done: -1
episode: 8/4000, score: -1000, time: 75, done: -1
episode: 9/4000, score: -1000, time: 73, done: -1
episode: 10/4000, score: -1000, time: 74, done: -1
episode: 11/4000, score: -1000, time: 73, done: -1
episode: 12/4000, score: -1000, time: 75, done: -1
episode: 13/4000, score: -1000, time: 76, done: -1
episode: 14/4000, score: -1000, time: 73, done: -1
episode: 15/4000, score: -1000, time: 74, done: -1
episode: 16/4000, score: -1000, time: 75, done: -1
episode: 17/4000, score: -1000, time: 75, done: -1
episode: 18/4000, score: -1000, time: 75, done: -1
episode: 19/4000, score: 1000, time: 76, 

episode: 161/4000, score: -1000, time: 445, done: -1
episode: 162/4000, score: -1000, time: 82, done: -1
episode: 163/4000, score: -1000, time: 82, done: -1
episode: 164/4000, score: -1000, time: 83, done: -1
episode: 165/4000, score: -1000, time: 66, done: -1
episode: 166/4000, score: -1000, time: 157, done: -1
episode: 167/4000, score: -1000, time: 83, done: -1
episode: 168/4000, score: -1000, time: 84, done: -1
episode: 169/4000, score: -1000, time: 78, done: -1
episode: 170/4000, score: -1000, time: 747, done: -1
episode: 171/4000, score: -1000, time: 66, done: -1
episode: 172/4000, score: -1000, time: 659, done: -1
episode: 173/4000, score: -1000, time: 66, done: -1
episode: 174/4000, score: -1000, time: 65, done: -1
episode: 175/4000, score: 1000, time: 235, done: 1
episode: 176/4000, score: -1000, time: 211, done: -1
episode: 177/4000, score: -1000, time: 570, done: -1
episode: 178/4000, score: -1000, time: 219, done: -1
episode: 179/4000, score: -1000, time: 65, done: -1
episod

episode: 318/4000, score: 1000, time: 341, done: 1
episode: 319/4000, score: -1000, time: 89, done: -1
episode: 320/4000, score: -1000, time: 527, done: -1
episode: 321/4000, score: -1000, time: 171, done: -1
episode: 322/4000, score: -1000, time: 211, done: -1
episode: 323/4000, score: -1000, time: 210, done: -1
episode: 324/4000, score: -1000, time: 211, done: -1
episode: 325/4000, score: 1000, time: 427, done: 1
episode: 326/4000, score: 1000, time: 75, done: 1
episode: 327/4000, score: -1000, time: 530, done: -1
episode: 328/4000, score: -1000, time: 552, done: -1
episode: 329/4000, score: -1000, time: 64, done: -1
episode: 330/4000, score: 1000, time: 460, done: 1
episode: 331/4000, score: -1000, time: 773, done: -1
episode: 332/4000, score: -1000, time: 167, done: -1
episode: 333/4000, score: 1000, time: 76, done: 1
episode: 334/4000, score: -1000, time: 212, done: -1
episode: 335/4000, score: -1000, time: 212, done: -1
episode: 336/4000, score: -1000, time: 65, done: -1
episode:

episode: 475/4000, score: -1000, time: 171, done: -1
episode: 476/4000, score: -1000, time: 174, done: -1
episode: 477/4000, score: -1000, time: 90, done: -1
episode: 478/4000, score: -1000, time: 180, done: -1
episode: 479/4000, score: -1000, time: 62, done: -1
episode: 480/4000, score: -1000, time: 497, done: -1
episode: 481/4000, score: -1000, time: 91, done: -1
episode: 482/4000, score: -1000, time: 170, done: -1
episode: 483/4000, score: -1000, time: 169, done: -1
episode: 484/4000, score: 1000, time: 141, done: 1
episode: 485/4000, score: -1000, time: 63, done: -1
episode: 486/4000, score: -1000, time: 63, done: -1
episode: 487/4000, score: -1000, time: 90, done: -1
episode: 488/4000, score: -1000, time: 89, done: -1
episode: 489/4000, score: -1000, time: 677, done: -1
episode: 490/4000, score: -1000, time: 683, done: -1
episode: 491/4000, score: 1000, time: 149, done: 1
episode: 492/4000, score: -1000, time: 177, done: -1
episode: 493/4000, score: -1000, time: 495, done: -1
epis

episode: 633/4000, score: -1000, time: 176, done: -1
episode: 634/4000, score: -1000, time: 177, done: -1
episode: 635/4000, score: 1000, time: 122, done: 1
episode: 636/4000, score: -1000, time: 163, done: -1
episode: 637/4000, score: -1000, time: 61, done: -1
episode: 638/4000, score: -1000, time: 167, done: -1
episode: 639/4000, score: -1000, time: 645, done: -1
episode: 640/4000, score: -1000, time: 176, done: -1
episode: 641/4000, score: -1000, time: 63, done: -1
episode: 642/4000, score: -1000, time: 643, done: -1
episode: 643/4000, score: -1000, time: 627, done: -1
episode: 644/4000, score: -1000, time: 161, done: -1
episode: 645/4000, score: -1000, time: 467, done: -1
episode: 646/4000, score: -1000, time: 174, done: -1
episode: 647/4000, score: -1000, time: 62, done: -1
episode: 648/4000, score: -1000, time: 61, done: -1
episode: 649/4000, score: -1000, time: 61, done: -1
episode: 650/4000, score: -1000, time: 175, done: -1
episode: 651/4000, score: -1000, time: 450, done: -1


episode: 790/4000, score: -1000, time: 61, done: -1
episode: 791/4000, score: -1000, time: 92, done: -1
episode: 792/4000, score: -1000, time: 92, done: -1
episode: 793/4000, score: -1000, time: 92, done: -1
episode: 794/4000, score: -1000, time: 656, done: -1
episode: 795/4000, score: 1000, time: 69, done: 1
episode: 796/4000, score: -1000, time: 458, done: -1
episode: 797/4000, score: -1000, time: 452, done: -1
episode: 798/4000, score: -1000, time: 450, done: -1
episode: 799/4000, score: 1000, time: 157, done: 1
episode: 800/4000, score: -1000, time: 92, done: -1
episode: 801/4000, score: -1000, time: 166, done: -1
episode: 802/4000, score: 1000, time: 82, done: 1
episode: 803/4000, score: -1000, time: 629, done: -1
episode: 804/4000, score: 1000, time: 116, done: 1
episode: 805/4000, score: -1000, time: 443, done: -1
episode: 806/4000, score: -1000, time: 172, done: -1
episode: 807/4000, score: -1000, time: 177, done: -1
episode: 808/4000, score: 1000, time: 70, done: 1
episode: 80

episode: 947/4000, score: -1000, time: 642, done: -1
episode: 948/4000, score: -1000, time: 456, done: -1
episode: 949/4000, score: -1000, time: 459, done: -1
episode: 950/4000, score: -1000, time: 61, done: -1
episode: 951/4000, score: -1000, time: 61, done: -1
episode: 952/4000, score: -1000, time: 162, done: -1
episode: 953/4000, score: -1000, time: 648, done: -1
episode: 954/4000, score: -1000, time: 648, done: -1
episode: 955/4000, score: -1000, time: 171, done: -1
episode: 956/4000, score: -1000, time: 61, done: -1
episode: 957/4000, score: -1000, time: 430, done: -1
episode: 958/4000, score: -1000, time: 458, done: -1
episode: 959/4000, score: -1000, time: 451, done: -1
episode: 960/4000, score: -1000, time: 62, done: -1
episode: 961/4000, score: -1000, time: 614, done: -1
episode: 962/4000, score: 1000, time: 132, done: 1
episode: 963/4000, score: -1000, time: 61, done: -1
episode: 964/4000, score: -1000, time: 62, done: -1
episode: 965/4000, score: -1000, time: 177, done: -1
e

episode: 1102/4000, score: -1000, time: 641, done: -1
episode: 1103/4000, score: -1000, time: 65, done: -1
episode: 1104/4000, score: -1000, time: 92, done: -1
episode: 1105/4000, score: -1000, time: 61, done: -1
episode: 1106/4000, score: -1000, time: 61, done: -1
episode: 1107/4000, score: -1000, time: 92, done: -1
episode: 1108/4000, score: -1000, time: 61, done: -1
episode: 1109/4000, score: -1000, time: 61, done: -1
episode: 1110/4000, score: -1000, time: 175, done: -1
episode: 1111/4000, score: -1000, time: 92, done: -1
episode: 1112/4000, score: -1000, time: 93, done: -1
episode: 1113/4000, score: -1000, time: 92, done: -1
episode: 1114/4000, score: -1000, time: 92, done: -1
episode: 1115/4000, score: -1000, time: 165, done: -1
episode: 1116/4000, score: -1000, time: 62, done: -1
episode: 1117/4000, score: -1000, time: 61, done: -1
episode: 1118/4000, score: -1000, time: 147, done: -1
episode: 1119/4000, score: -1000, time: 92, done: -1
episode: 1120/4000, score: -1000, time: 92

episode: 1256/4000, score: -1000, time: 92, done: -1
episode: 1257/4000, score: -1000, time: 171, done: -1
episode: 1258/4000, score: -1000, time: 167, done: -1
episode: 1259/4000, score: -1000, time: 357, done: -1
episode: 1260/4000, score: -1000, time: 438, done: -1
episode: 1261/4000, score: -1000, time: 92, done: -1
episode: 1262/4000, score: -1000, time: 92, done: -1
episode: 1263/4000, score: -1000, time: 92, done: -1
episode: 1264/4000, score: -1000, time: 92, done: -1
episode: 1265/4000, score: -1000, time: 647, done: -1
episode: 1266/4000, score: -1000, time: 160, done: -1
episode: 1267/4000, score: -1000, time: 166, done: -1
episode: 1268/4000, score: -1000, time: 482, done: -1
episode: 1269/4000, score: -1000, time: 178, done: -1
episode: 1270/4000, score: -1000, time: 169, done: -1
episode: 1271/4000, score: -1000, time: 61, done: -1
episode: 1272/4000, score: -1000, time: 61, done: -1
episode: 1273/4000, score: -1000, time: 61, done: -1
episode: 1274/4000, score: -1000, ti

episode: 1410/4000, score: -1000, time: 650, done: -1
episode: 1411/4000, score: -1000, time: 468, done: -1
episode: 1412/4000, score: -1000, time: 158, done: -1
episode: 1413/4000, score: -1000, time: 173, done: -1
episode: 1414/4000, score: -1000, time: 618, done: -1
episode: 1415/4000, score: -1000, time: 170, done: -1
episode: 1416/4000, score: -1000, time: 175, done: -1
episode: 1417/4000, score: -1000, time: 174, done: -1
episode: 1418/4000, score: -1000, time: 167, done: -1
episode: 1419/4000, score: -1000, time: 168, done: -1
episode: 1420/4000, score: -1000, time: 172, done: -1
episode: 1421/4000, score: -1000, time: 164, done: -1
episode: 1422/4000, score: -1000, time: 62, done: -1
episode: 1423/4000, score: -1000, time: 61, done: -1
episode: 1424/4000, score: -1000, time: 157, done: -1
episode: 1425/4000, score: -1000, time: 445, done: -1
episode: 1426/4000, score: -1000, time: 166, done: -1
episode: 1427/4000, score: -1000, time: 61, done: -1
episode: 1428/4000, score: 1000

episode: 1564/4000, score: -1000, time: 159, done: -1
episode: 1565/4000, score: -1000, time: 62, done: -1
episode: 1566/4000, score: -1000, time: 653, done: -1
episode: 1567/4000, score: -1000, time: 460, done: -1
episode: 1568/4000, score: -1000, time: 454, done: -1
episode: 1569/4000, score: -1000, time: 170, done: -1
episode: 1570/4000, score: -1000, time: 61, done: -1
episode: 1571/4000, score: -1000, time: 64, done: -1
episode: 1572/4000, score: -1000, time: 442, done: -1
episode: 1573/4000, score: -1000, time: 61, done: -1
episode: 1574/4000, score: -1000, time: 434, done: -1
episode: 1575/4000, score: -1000, time: 625, done: -1
episode: 1576/4000, score: -1000, time: 213, done: -1
episode: 1577/4000, score: -1000, time: 456, done: -1
episode: 1578/4000, score: -1000, time: 457, done: -1
episode: 1579/4000, score: -1000, time: 451, done: -1
episode: 1580/4000, score: -1000, time: 655, done: -1
episode: 1581/4000, score: -1000, time: 166, done: -1
episode: 1582/4000, score: -1000

KeyboardInterrupt: 

In [None]:
print(index_to_action(114))