In [1]:
from common_functions import *
from game import CCGame
from state import StateSpace
from rewarder import Rewarder
from DQN import DQNAgent

plt.rcParams['axes.facecolor'] = '#323A48'
plt.rcParams['axes.edgecolor'] = '#92A2BD'
plt.rcParams['figure.facecolor'] = '#323A48'
plt.rcParams['text.color'] = '#DBE1EA'
plt.rcParams['xtick.color'] = '#DBE1EA'
plt.rcParams['ytick.color'] = '#DBE1EA'
plt.rcParams['grid.linestyle'] = ':'
plt.rcParams['grid.color'] = '#3F495A'

from cycler import cycler
plt.rcParams['axes.prop_cycle'] = cycler(color=['c', 'goldenrod', 'mediumseagreen', 'blueviolet', 'indianred', 'mediumpurple', 'cornflowerblue',
                                               'darkorchid', 'gold', 'olivedrab'])

In [2]:
def train_agent(n_games=10):
  rewarder = Rewarder()
  agent = DQNAgent()
  stateSpace = StateSpace()
  game = CCGame()
  scores = []
  ticks = []
  
  
  for k in range(n_games):
    # Implement epsilon-greedy algorithm
    epsilon = 30 - k
    rewards = []
    game = CCGame()
    game.start()
    while not game.crashed:
      state = stateSpace.stateFromTick(game.car_id, game.tick_data, game.mapMatrix)
      if np.random.randint(0, 200) < epsilon:
        command = agent.actions[np.random.randint(len(agent.actions))]
      else:
        #print_t("state.shape = {0}".format(state.shape))
        predicted = agent.network.predict(state.reshape(1, len(state)))
        #print_t("predicted.shape = {0}".format(predicted.shape))
        command = agent.actions[np.argmax(agent.network.predict(state.reshape(1, len(state)))[0])]
      game.send_command(command)
      reward = rewarder.calculate_reward(game)
      rewards.append(reward)
      state_next = stateSpace.stateFromTick(game.car_id, game.tick_data, game.mapMatrix)
      agent.remember(state, command, reward, state_next, done=game.crashed)
    if game.started:
      game.close()
      
    print_t("Game #{0} ended after {1} ticks. Total score {2}".format(k, game.ticknum, np.sum(rewards)))
    ticks.append(game.ticknum)
    scores.append(np.sum(rewards))
  return ticks, scores

In [3]:
results = train_agent(50)

[2019-04-21 23:15:38] CAR CRASHED.
[2019-04-21 23:15:38] Game #0 ended after 59 ticks. Total score -99
[2019-04-21 23:15:42] CAR CRASHED.
[2019-04-21 23:15:42] Game #1 ended after 53 ticks. Total score -99
[2019-04-21 23:15:47] CAR CRASHED.
[2019-04-21 23:15:47] Game #2 ended after 75 ticks. Total score -99
[2019-04-21 23:15:53] CAR CRASHED.
[2019-04-21 23:15:53] Game #3 ended after 108 ticks. Total score -99
[2019-04-21 23:15:55] CAR CRASHED.
[2019-04-21 23:15:55] Game #4 ended after 32 ticks. Total score -99
[2019-04-21 23:16:01] CAR CRASHED.
[2019-04-21 23:16:01] Game #5 ended after 77 ticks. Total score -100
[2019-04-21 23:16:02] CAR CRASHED.
[2019-04-21 23:16:02] Game #6 ended after 16 ticks. Total score -99
[2019-04-21 23:16:10] CAR CRASHED.
[2019-04-21 23:16:10] Game #7 ended after 99 ticks. Total score -99
[2019-04-21 23:16:17] CAR CRASHED.
[2019-04-21 23:16:17] Game #8 ended after 100 ticks. Total score -99
[2019-04-21 23:16:26] CAR CRASHED.
[2019-04-21 23:16:26] Game #9 ended

In [4]:
rewarder = Rewarder()
agent = DQNAgent()
stateSpace = StateSpace()

def testGameTime():
  g = CCGame()
  g.start()
  while not g.crashed:
    state = stateSpace.stateFromTick(g.car_id, g.tick_data, g.mapMatrix)
    command = g.send_random_command()
    reward = rewarder.calculate_reward(g)
    state_next = stateSpace.stateFromTick(g.car_id, g.tick_data, g.mapMatrix)
    agent.remember(state, command, reward, state_next, done=g.crashed)
  print_t("ticks: {}".format(g.ticknum))
  g.close()
  return
testGameTime()

[2019-04-21 23:19:58] CAR CRASHED.
[2019-04-21 23:19:58] ticks: 24
[2019-04-21 23:19:58] Game already closed.


In [5]:
agent.train_on_memory()

In [6]:
DQNAgent().network.predict(np.random.randn(511).reshape(1, 511))

array([[0.07611191, 0.06611162, 0.10531221, 0.27772307, 0.05106611,
        0.07332172, 0.06242919, 0.14575453, 0.07525358, 0.06691611]],
      dtype=float32)

In [7]:
game = train_agent(50)

[2019-04-21 23:20:01] CAR CRASHED.
[2019-04-21 23:20:01] Game #0 ended after 25 ticks. Total score -98
[2019-04-21 23:20:02] CAR CRASHED.
[2019-04-21 23:20:02] Game #1 ended after 4 ticks. Total score -98
[2019-04-21 23:20:02] CAR CRASHED.
[2019-04-21 23:20:02] Game #2 ended after 9 ticks. Total score -98
[2019-04-21 23:20:03] CAR CRASHED.
[2019-04-21 23:20:03] Game #3 ended after 6 ticks. Total score -100
[2019-04-21 23:20:03] CAR CRASHED.
[2019-04-21 23:20:03] Game #4 ended after 4 ticks. Total score -98
[2019-04-21 23:20:04] CAR CRASHED.
[2019-04-21 23:20:04] Game #5 ended after 8 ticks. Total score -98
[2019-04-21 23:20:04] CAR CRASHED.
[2019-04-21 23:20:04] Game #6 ended after 8 ticks. Total score -98
[2019-04-21 23:20:05] CAR CRASHED.
[2019-04-21 23:20:05] Game #7 ended after 5 ticks. Total score -98
[2019-04-21 23:20:05] CAR CRASHED.
[2019-04-21 23:20:05] Game #8 ended after 3 ticks. Total score -98
[2019-04-21 23:20:06] CAR CRASHED.
[2019-04-21 23:20:06] Game #9 ended after 11 

KeyError: 'car_id'

In [None]:
game.close()

In [None]:
g = CCGame()

for k in range(10):
  g.start()
  g.send_random_command()
  g.close()

In [None]:
## Test section
g = CCGame(log_ticks=False)
ss = StateSpace()
rewarder = Rewarder()
g.start()
state = ss.stateFromTick(g.car_id, g.tick_data, g.mapMatrix)
c=g.send_random_command()
reward = rewarder.calculate_reward(g)
g.close()

print_t("Game id: {}".format(g.game_id))
print_t("command: {}".format(c))
print_t("reward: {}".format(reward))

In [None]:
""" TODO: As a baseline project, go to a certain point without dying. Use networkx
to find the shortest path to the point.
"""