In [1]:
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import load_model
import random
import time

Using TensorFlow backend.


In [2]:
from core import *

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
env = Game(mode="ai", gold_amount=50, speed=5)

In [4]:
# Initializing state size, action size, batch size and episodes
state_size, action_size = env.describe()
batch_size = 64
n_episodes = 2000

In [5]:
# Building a Deep Q Network
class DQNAgent:
    
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        # Make a list of memory of size 1000, when adding new values remove oldest values
        self.memory = deque(maxlen=2000)
        # Discount the future rewards to value the nearest guesses higher
        self.gamma = 0.5
        # Exploration rate
        self.epsilon = 1.0
        # Explore at the beginning first then exploit
        self.epsilon_decay = 0.995
        # Minimum amount of exploration
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.state_size = state_size
        
    def _build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim = self.state_size, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_sate, done):
        # Save all info to the memory deque
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        # Either explore
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        # Or exploit
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def predict(self, state):
        # Reshaping next state to save as neural network input
        state = np.reshape(state, [1, self.state_size])
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def replay(self, batch_size):
        # Randomly replay some of our memories from deque
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            # If episode has ended future reward is equal to reward
            target = reward
            if not done:
                # Use Neural Network to predict a future reward
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            self.model.fit(state, target_f, epochs=1, verbose=0)
            
        # Decrease exploration
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self, name):
        self.model = load_model(name)
        
    def save(self, name):
        self.model.save(name)

In [6]:
# Initializing DQN agent
agent = DQNAgent(state_size, action_size)







In [None]:
start_time = time.time()
times_list = []

# Trainging agent to play Car Racing game
done = False
for e in range(n_episodes):
    # Resetting environment in each trial
    env.reset()

    gnome_vision_flat = env.get_gnome_vision_flat()
    
    current_gold = env.get_gold()
    total_exit = env.get_exit()
    gnome = env.get_gnome()
    gnome_prev_x = gnome_x = gnome.x
    gnome_prev_y = gnome_y = gnome.y

        
    gnome_vision_flat.extend([current_gold, total_exit, gnome_x, gnome_y])
    
    # Reshaping state array into shape that neural network accepts
    state = np.reshape(gnome_vision_flat, [1, state_size])
    
    step_count = 0
    
    if e % 100 == 0:
        delta = time.time() - start_time
        start_time = time.time()
        delta = delta // 1
        delta_min = delta // 60
        delta_sec = delta % 60
        times_list.append(delta)
        print("Episodes: {} / {} / Time: {}:{}".format(e, n_episodes, int(delta_min), int(delta_sec)))

    for trial in range(500):
        done = False
        
        # Making action based on agent's action policy
        action = agent.act(state)

        # Performing an action and assinging returned next state and whether the car ahs crashed or not
        env.step(action)
        step_count += 1
        gnome_vision_flat = env.get_gnome_vision_flat()
        
        gold = env.get_gold()
        exit = env.get_exit()
        gnome = env.get_gnome()
        gnome_x = gnome.x
        gnome_y = gnome.y
        
        gnome_vision_flat.extend([gold, exit, gnome_x, gnome_y])
        
        # Reshaping next state to save as neural network input
        next_state = np.reshape(gnome_vision_flat, [1, state_size])
        
        reward = -1
        
        if gnome_x == gnome_prev_x and gnome_y == gnome_prev_y:
            reward -=2
        
        if gold > current_gold:
            reward += 12
        
        if exit < total_exit:
            reward += 3
        
        if exit == 0:
            reward += 5
            done = True
            
        gnome_prev_x = gnome_x
        gnome_prev_y = gnome_y
        current_gold = gold
        total_exit = exit
        
        # Remember the state - action - reward - next_state - action_result combination for future recalls
        agent.remember(state, action, reward, next_state, done)
        # Update current state with next_state given after performing an action
        state = next_state
    # If the agent memory is full replay the memory
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)
        
times_sum = sum(times_list)
times_mean = times_sum / len(times_list)
times_mean_round = times_mean // 1
times_sum = times_sum // 1
delta_min = times_sum // 60
delta_sec = times_sum % 60

avg_min = times_mean_round // 60
avg_max = times_mean_round // 60

print("Total time: {}:{} / Average: {}:{}".format(int(delta_min), int(delta_sec), int(avg_min), int(avg_max)))

In [None]:
env.reset()

In [None]:
model_name = "epochs_{}".format(n_episodes) + ".h5"

In [None]:
# Save the agent model
agent.save(model_name)

In [None]:
agent.load(model_name)

In [None]:
env.model = agent

In [None]:
env.play()