In [98]:
import pygame
import sys
import time
import numpy as np
import random
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import utils
from collections import deque

utils.disable_interactive_logging()

### Step 1: Define the Snake Environment

In [99]:
class SnakeEnvironment:
    def __init__(self, width, height):
        self.width = width
        self.height = height

        # Initialize the game board and snake's position using numpy arrays
        self.board = np.zeros((self.height, self.width), dtype=np.int32)
        self.snake = np.array([(self.width // 2, self.height // 2)])
        self.snakeLength = 1

        # Place the snake on the board
        self.board[self.snake[0][1], self.snake[0][0]] = 1

        # Initialize the food position and place the food on the board
        self.food_x, self.food_y = self._generate_food()
        self.board[self.food_y, self.food_x] = 2

        # Initialize the score
        self.score = 0

        # Set the number of actions
        self.num_actions = 4

        self.rendering = False
        self.pygame_screen = None

    def _generate_food(self):
        # Generate food at random position where snake is not present
        empty_cells = np.where(self.board == 0)
        idx = random.randint(0, empty_cells[0].size-1)
        return empty_cells[1][idx], empty_cells[0][idx]
    
    def reset(self):
        # reset the environment to the initial state

        # clear the board and reset the snake's position to the center
        self.board = np.zeros((self.height, self.width), dtype=np.int32)
        self.snake = [(self.width // 2, self.height // 2)]
        self.snakeLength = 1

        # Place the snake of the board
        self.board[self.snake[0][1], self.snake[0][0]] = 1

        # Reset the food position
        self.food_x, self.food_y = self._generate_food()
        self.board[self.food_y, self.food_x] = 2

        # Reset the score
        self.score = 0
        return self.board
    
    def step(self, action):
        # Take the specified action and return the next state, reward, and whether the episode is done

        # Example: Let's assume the actions are represented as integers:
        # 0 - UP, 1 - DOWN, 2 - LEFT, 3 - RIGHT

        # Update the snake's position based on the action
        head_x, head_y = self.snake[-1]

        if action == 0: # UP
            next_head = (head_x, head_y-1)
        elif action == 1:  # DOWN
            next_head = (head_x, head_y + 1)
        elif action == 2:  # LEFT
            next_head = (head_x - 1, head_y)
        elif action == 3:  # RIGHT
            next_head = (head_x + 1, head_y)

        # Check for collisions with the walls or the snake's body
        if (
            next_head[0] < 0
            or next_head[0] >= self.width
            or next_head[1] < 0
            or next_head[1] >= self.height
            or next_head in self.snake
        ):
            # If there's a collision, the episode is done, and the agent receives a negative reward
            done = True
            reward = -10
            return self.board, reward, done
        
        # If the snake eats the food
        if next_head[0] == self.food_x and next_head[1] == self.food_y:
            # Increase the score, place a new food, and don't remove the last part of the snake
            self.score += 1

            # Increase the snake length
            self.snakeLength += 1

            # Reset the previous food coordinate to 0
            self.board[self.food_y, self.food_x] = 0

            # Place a new food
            self.food_x, self.food_y = self._generate_food()
            self.board[self.food_y, self.food_x] = 2
        else:
            # If the snake didn't eat the food, remove the last part of the snake to move
            tail_x, tail_y = self.snake[0]
            self.board[tail_y, tail_x] = 0
            self.snake.pop(0)
        
        # Move the snake and update the board
        self.snake.append(next_head)
        self.board[next_head[1], next_head[0]] = 1

        # Check if the game is won (snake covers the entire board)
        if self.snakeLength == self.width * self.height - 1:  # -1 for the food cell
            done = True
            reward = 10  # Positive reward for winning the game
            return self.board, reward, done

        # The episode is not done yet, and the agent receives a small positive reward for moving
        done = False
        reward = -1
        return self.board, reward, done
    
    def render(self, message=None):
        # Set up pygame window
        if not self.rendering:
            self.rendering = True
            pygame.init()
            window_size = (800, 600)
            self.pygame_screen = pygame.display.set_mode(window_size)
            pygame.display.set_caption("Snake Game")

        # Calculate the position to center the playground
        playground_x = (800 - self.width * 20) // 2
        playground_y = (600 - self.height * 20) // 2

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.rendering = False
                pygame.quit()
                sys.exit()

        self.pygame_screen.fill((0, 0, 0))  # Clear the screen

        # Draw the message box
        if message:
            font = pygame.font.Font(None, 36)
            text = font.render(message, True, (255, 255, 255))
            text_rect = text.get_rect(center=(400, 100))
            self.pygame_screen.blit(text, text_rect)

        # Draw the snake and food
        for y in range(self.height):
            for x in range(self.width):
                cell_value = self.board[y, x]
                if cell_value == 1:  # Snake
                    pygame.draw.rect(self.pygame_screen, (0, 255, 0), pygame.Rect(playground_x + x * 20, playground_y + y * 20, 20, 20))
                elif cell_value == 2:  # Food
                    pygame.draw.rect(self.pygame_screen, (255, 0, 0), pygame.Rect(playground_x + x * 20, playground_y +  y * 20, 20, 20))
    
        # Draw the virtual boundary around the playground
        pygame.draw.rect(self.pygame_screen, (255, 255, 255), pygame.Rect(playground_x, playground_y, self.width * 20, self.height * 20), 2)
        pygame.display.flip()
    
    def close(self):
        while True:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()

### Step 1: Define the DQNAgent class with Neural Network

In [100]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.memory_len = 0
        self.gamma = 0.95  # Discount factor for future rewards
        self.epsilon = 1.0  # Exploration rate, start with full exploration
        self.epsilon_min = 0.01  # Minimum exploration rate
        # self.epsilon_decay = 0.995  # Exploration rate decay
        self.learning_rate = 0.01  # Learning rate for the neural network
        self.target_update_frequency = 40
        self.weight_update_frequency = 5
        self.model = self._build_model()
        self.target_model = self._build_model()

    def _build_model(self):
        # Neural Network with two fully connected layers
        model = Sequential()
        model.add(Dense(64, input_shape=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        # Update the target network's weights with the main network's weights
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        # Store the experience in the replay memory
        self.memory.append((state, action, reward, next_state, done))
        self.memory_len += 1

    def act(self, state):
        # Epsilon-greedy policy to choose the action
        if np.random.rand() < self.epsilon:
            return random.randrange(self.action_size)
        else:
            return np.argmax(self.model.predict(state)[0])
    
    def epsilon_decay(self, episode, max_episodes):
        epsilon_t = 1 - 1.5 * episode / max_episodes
        return max(epsilon_t, self.epsilon_min)

    def replay(self, batch_size):
        # Experience replay to train the network
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.target_model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

### Step 2: Define the Deep-Q-Network(DQN) class

In [101]:
# class DQN(Model):
#     # Initialize the DQN
#     def __init__(self, num_actions):
#         super(DQN, self).__init__()

#         # Flatten the input (2D game board) into 1D array
#         self.flatten = layers.Flatten()

#         # Define the first hidden layer with 128 neurons and ReLU activation
#         self.dense1 = layers.Dense(128, activation='relu')

#         # Define the second hidden layer with 128 neurons and ReLU activation
#         self.dense2 = layers.Dense(128, activation='relu')

#         # Define the output layer with as many neurons as there are actions
#         # The output represents the Q-values for each action
#         self.output_layer = layers.Dense(num_actions)

#         self.optimizer = optimizers.Adam()
    
#     def __call__(self, inputs):
#         x = self.flatten(inputs)
#         x = self.dense1(x)
#         x = self.dense2(x)
#         return self.output_layer(x)


### Step 3: Implementing the Experience Replay Buffer

In [102]:
# class ExperienceReplayBuffer:
#     def __init__(self, buffer_size):
#         # Initialize the experience replay buffer with the specified buffer size
#         self.buffer_size = buffer_size
#         self.buffer = deque(maxlen=buffer_size)

#     def add_experience(self, experience):
#         # Add a new experience to the buffer
#         # The experience should be a tuple (state, action, reward, next_state, done)
#         self.buffer.append(experience)

#     def sample(self, batch_size):
#         # Sample a batch of experiences randomly from the buffer
#         # The batch size specifies how many experiences to sample
#         batch = random.sample(self.buffer, batch_size)
        
#         # Unzip the batch to separate states, actions, rewards, next_states, and dones
#         states, actions, rewards, next_states, dones = zip(*batch)
        
#         return states, actions, rewards, next_states, dones

#     def __len__(self):
#         # Return the current size of the buffer
#         return len(self.buffer)

### Step 4: Training the Agent with Deep Q-Learning

In [103]:
def train_agent(num_episodes, batch_size=32):
    
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        total_reward = 0
        food_pos = env.food_x, env.food_y
        print(f"Food position: {food_pos}")
        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

        if tuple(env.snake[-1]) == tuple(food_pos):
            print('************* Ate the food! **************')
        else:
            print("----------- Hit the boundary wall ---------")

        if episode % agent.weight_update_frequency == 0 and agent.memory_len > batch_size:
            agent.replay(batch_size)
    
        if episode % agent.target_update_frequency == 0:
            agent.update_target_model()

        if agent.epsilon > agent.epsilon_min:
            agent.epsilon = agent.epsilon_decay(episode, num_episodes)


        # Print the episode number and total reward for monitoring progress
        print(f"Episode: {episode + 1}, Total Reward: {total_reward}")

    print("Training completed.")


env = SnakeEnvironment(10, 10)
state_size = (10, 10)
agent = DQNAgent(state_size, action_size=4)
train_agent(2000)

Food position: (8, 7)
----------- Hit the boundary wall ---------
Episode: 1, Total Reward: -47
Food position: (1, 1)
----------- Hit the boundary wall ---------
Episode: 2, Total Reward: -42
Food position: (4, 8)
----------- Hit the boundary wall ---------
Episode: 3, Total Reward: -31
Food position: (2, 7)
----------- Hit the boundary wall ---------
Episode: 4, Total Reward: -20
Food position: (7, 8)
----------- Hit the boundary wall ---------
Episode: 5, Total Reward: -32
Food position: (5, 1)
----------- Hit the boundary wall ---------
Episode: 6, Total Reward: -85
Food position: (7, 0)
----------- Hit the boundary wall ---------
Episode: 7, Total Reward: -35
Food position: (4, 4)
************* Ate the food! **************
Episode: 8, Total Reward: -12
Food position: (0, 8)
----------- Hit the boundary wall ---------
Episode: 9, Total Reward: -58
Food position: (0, 4)
----------- Hit the boundary wall ---------
Episode: 10, Total Reward: -33
Food position: (1, 0)
----------- Hit th

KeyboardInterrupt: 

### Step 7: Evaluate the Agent

In [None]:
def evaluate_agent(num_episodes=10):
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        total_reward = 0
        food_pos = env.food_x, env.food_y
        print(f"Food position: {food_pos}")
        message = None
        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            state = next_state
            total_reward += reward
            message = f"( {env.snake[-1][0]}, {env.snake[-1][1]} )"
            env.render(message)
            time.sleep(0.1)

        if tuple(env.snake[-1]) == tuple(food_pos):
            message = '** Ate the food! **'
        else:
            message = "-- Hit the boundary wall --"

        env.render(message)
        print(f"Evaluation Episode: {episode + 1}, Reward: {total_reward}")
        time.sleep(0.8)
    
    print("Finished testing!")

env = SnakeEnvironment(10, 10)
evaluate_agent()
env.close()

In [None]:
# Example usage
# if __name__ == "__main__":
#     env = SnakeEnvironment(width=10, height=10)
#     num_actions = 4  # UP, DOWN, LEFT, RIGHT
#     dqn = DQN(num_actions)
#     replay_buffer = ExperienceReplayBuffer(buffer_size=10000)

#     train_agent(env, dqn, replay_buffer)
    # evaluate_agent(env, dqn)