# This notebook is meant to train a Deep Q Network model using keras for a Racing Game Environment

### Parameters
- Optimizer - Adam
- Input State size - 4
- Action size - 3
- Dense layer 1 - 16 nodes with relu activation
- Dense layer 2 - 24 nodes with relu activation
- Loss function - MSE, Mean Squere Error
- Epochs - 1000
- Actions in each epoch - 1000
- Batch size - 32

In [None]:
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import random
from main import Game

In [None]:
# Initializing game environment
env = Game()

In [None]:
# Initializing state size, action size, batch size and episodes
state_size = env.constants.STATE_SIZE.value
action_size = env.constants.ACTION_SIZE.value
batch_size = 32
n_episodes = 1000

In [None]:
# Building a Deep Q Network
class DQNAgent:
    
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        # Make a list of memory of size 2000, when adding new values remove oldest values
        self.memory = deque(maxlen=2000)
        # Discount the future rewards to value the nearest guesses higher
        self.gamma = 0.9
        # Exploration rate
        self.epsilon = 1.0
        # Explore at the beginning first then exploit
        self.epsilon_decay = 0.995
        # Minimum amount of exploration
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()
        
    def _build_model(self):
        model = Sequential()
        model.add(Dense(16, input_dim = self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_sate, done):
        # Save all info to the memory deque
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        # Either explore
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        # Or exploit
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def replay(self, batch_size):
        # Randomly replay some of our memories from deque
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            # If episode has ended future reward is equal to reward
            target = reward
            if not done:
                # Use Neural Network to predict a future reward
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            self.model.fit(state, target_f, epochs=1, verbose=0)
            
        # Decrease exploration
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self, name):
        self.model.load(name)
        
    def save(self, name):
        self.model.save(name)

In [None]:
# Initializing DQN agent
agent = DQNAgent(state_size, action_size)

In [None]:
# Trainging agent to play Car Racing game
done = False
for e in range(n_episodes):
    # Resetting environment in each trial
    state = env.reset()
    # Reshaping state array into shape that neural network accepts
    state = np.reshape(state, [1, state_size])
    
    for time in range(1000):
        # Making action based on agent's action policy
        action = agent.act(state)

        # Performing an action and assinging returned next state and whether the car ahs crashed or not
        next_state, done = env.step(action)
        # Reshaping next state to save as neural network input
        next_state = np.reshape(next_state, [1, state_size])
        # If the car has crashed punish wtih -5 points
        if done:
            reward = -5
        else:
            # if the car has not crashed check if the car made a redundant move, if made reward with 1 point else reward with 2 points
            if action == 1:
                reward = 2
            else:
                reward = 1
        # Remember the state - action - reward - next_state - action_result combination for future recalls
        agent.remember(state, action, reward, next_state, done)
        # Update current state with next_state given after performing an action
        state = next_state
    # If the agent memory is full replay the memory
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)

In [None]:
# Save the agent model
agent.save("model" + '{:04d}'.format(e) + ".h5")

In [None]:
# Observe the agent play the game
env.play_model("model" + '{:04d}'.format(e) + ".h5")