In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
from collections import deque

In [17]:
# Snake game environment
class SnakeGame:
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.reset()

    def reset(self):
        self.snake = [(self.width // 2, self.height // 2)]
        self.direction = random.choice([(0, 1), (0, -1), (1, 0), (-1, 0)])
        self.apple = self.generate_apple()
        self.score = 0
        self.done = False

    def generate_apple(self):
        while True:
            apple = (random.randint(0, self.width - 1), random.randint(0, self.height - 1))
            if apple not in self.snake:
                return apple

    def move(self, direction):
        if not self.done:
            new_head = (self.snake[0][0] + direction[0], self.snake[0][1] + direction[1])
            if (0 <= new_head[0] < self.width) and (0 <= new_head[1] < self.height) and (new_head not in self.snake[1:]):
                self.snake.insert(0, new_head)
                if new_head == self.apple:
                    self.score += 1
                    self.apple = self.generate_apple()
                else:
                    self.snake.pop()
            else:
                self.done = True

In [18]:
# Deep Q-Network (DQN) model
class DQN(nn.Module):
    def __init__(self, input_size, output_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [19]:
# Experience replay memory
class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return states, actions, rewards, next_states, dones

    def __len__(self):
        return len(self.memory)

In [20]:
# DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size, batch_size, memory_capacity, gamma, epsilon_start, epsilon_end, epsilon_decay):
        self.state_size = state_size
        self.action_size = action_size
        self.batch_size = batch_size
        self.memory = ReplayMemory(memory_capacity)
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = DQN(state_size, action_size).to(self.device)
        self.target_model = DQN(state_size, action_size).to(self.device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.target_model.eval()
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.SmoothL1Loss()

    def get_action(self, state):
        if random.random() <= self.epsilon:
            return random.randint(0, self.action_size - 1)
        else:
            with torch.no_grad():
                state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
                q_values = self.model(state)
                return q_values.argmax().item()

    def update_epsilon(self):
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)

    def train(self):
        if len(self.memory) < self.batch_size:
            return

        states, actions, rewards, next_states, dones = self.memory.sample(self.batch_size)
        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).unsqueeze(1).to(self.device)
        rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)

        current_q_values = self.model(states).gather(1, actions)
        next_q_values = self.target_model(next_states).max(1)[0].unsqueeze(1)
        target_q_values = rewards + (1 - dones) * self.gamma * next_q_values

        loss = self.loss_fn(current_q_values, target_q_values)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_epsilon()

        if len(self.memory) % 1000 == 0:
            self.target_model.load_state_dict(self.model.state_dict())

In [21]:
# Constants
WIDTH = 10
HEIGHT = 10
STATE_SIZE = WIDTH * HEIGHT
ACTION_SIZE = 4  # up, down, left, right
BATCH_SIZE = 64
MEMORY_CAPACITY = 10000
GAMMA = 0.99
EPSILON_START = 1.0
EPSILON_END = 0.01
EPSILON_DECAY = 0.999

# Training
NUM_EPISODES = 1000
MAX_STEPS = 100

# Initialize game and agent
game = SnakeGame(WIDTH, HEIGHT)
agent = DQNAgent(STATE_SIZE, ACTION_SIZE, BATCH_SIZE, MEMORY_CAPACITY, GAMMA, EPSILON_START, EPSILON_END, EPSILON_DECAY)

for episode in range(NUM_EPISODES):
    game.reset()
    state = np.array(game.snake_to_array()).flatten()
    done = False
    total_reward = 0

    for step in range(MAX_STEPS):
        action = agent.get_action(state)
        game.move([(0, -1), (0, 1), (-1, 0), (1, 0)][action])
        next_state = np.array(game.snake_to_array()).flatten()
        reward = game.score
        done = game.done

        agent.memory.push(state, action, reward, next_state, done)
        total_reward += reward

        state = next_state

        agent.train()

        if done:
            break

    print(f"Episode: {episode+1}, Score: {total_reward}")
# Play the game using the trained agent
game.reset()
state = np.array(game.snake_to_array()).flatten()
done = False
total_reward = 0

while not done:
    action = agent.get_action(state)
    game.move([(0, -1), (0, 1), (-1, 0), (1, 0)][action])
    state = np.array(game.snake_to_array()).flatten()
    total_reward += game.score
    done = game.done

print(f"Total Score: {total_reward}")

AttributeError: 'SnakeGame' object has no attribute 'snake_to_array'

pygame 2.1.2 (SDL 2.0.18, Python 3.9.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


OSError: No file or directory found at flappy_bird_model.h5

In [30]:
import pygame
import random
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Snake game environment
class SnakeGame:
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.reset()

    def reset(self):
        self.snake = [(self.width // 2, self.height // 2)]
        self.direction = random.choice([(0, 1), (0, -1), (1, 0), (-1, 0)])
        self.apple = self.generate_apple()
        self.score = 0
        self.done = False

    def generate_apple(self):
        while True:
            apple = (random.randint(0, self.width - 1), random.randint(0, self.height - 1))
            if apple not in self.snake:
                return apple

    def move(self, direction):
        if not self.done:
            new_head = (self.snake[0][0] + direction[0], self.snake[0][1] + direction[1])
            if (0 <= new_head[0] < self.width) and (0 <= new_head[1] < self.height) and (new_head not in self.snake[1:]):
                self.snake.insert(0, new_head)
                if new_head == self.apple:
                    self.score += 1
                    self.apple = self.generate_apple()
                else:
                    self.snake.pop()
            else:
                self.done = True

    def get_state(self):
        head = self.snake[0]
        body = self.snake[1:]
        state = [
            # Check for obstacles and snake body around the head
            (head[0] - 1, head[1]) in body or head[0] == 0,  # Left
            (head[0] + 1, head[1]) in body or head[0] == self.width - 1,  # Right
            (head[0], head[1] - 1) in body or head[1] == 0,  # Up
            (head[0], head[1] + 1) in body or head[1] == self.height - 1,  # Down
            # Check for apple location relative to the head
            self.apple[0] < head[0],  # Apple to the left
            self.apple[0] > head[0],  # Apple to the right
            self.apple[1] < head[1],  # Apple above
            self.apple[1] > head[1]  # Apple below
        ]
        return np.array(state, dtype=int)

    def render(self):
        pygame.init()
        cell_size = 20
        screen_width = self.width * cell_size
        screen_height = self.height * cell_size
        screen = pygame.display.set_mode((screen_width, screen_height))
        pygame.display.set_caption("Snake Game")

        clock = pygame.time.Clock()

        while not self.done:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.done = True

            screen.fill((0, 0, 0))  # Clear the screen

            # Draw the snake
            for segment in self.snake:
                pygame.draw.rect(screen, (0, 255, 0), (segment[0] * cell_size, segment[1] * cell_size, cell_size, cell_size))

            # Draw the apple
            pygame.draw.rect(screen, (255, 0, 0), (self.apple[0] * cell_size, self.apple[1] * cell_size, cell_size, cell_size))

            pygame.display.flip()  # Update the screen
            clock.tick(10)  # Limit the frame rate

            self.move(self.direction)  # Move the snake

# Deep Q-Network (DQN) model
class DQNModel:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Constants
WIDTH = 20
HEIGHT = 20
STATE_SIZE = 8  # Number of state features
ACTION_SIZE = 4  # up, down, left, right
BATCH_SIZE = 32

# Initialize game and agent
game = SnakeGame(WIDTH, HEIGHT)
model = DQNModel(STATE_SIZE, ACTION_SIZE)

# Training
for episode in range(1000):
    state = game.get_state()
    done = False
    score = 0
    while not done:
        action = model.act(state)
        game.move([(0, -1), (0, 1), (-1, 0), (1, 0)][action])
        next_state = game.get_state()
        reward = game.score - score
        score = game.score
        done = game.done
        model.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print(f"Episode: {episode+1}, Score: {score}")
            break
        if len(model.memory) > BATCH_SIZE:
            model.replay(BATCH_SIZE)

# Play the game using the trained agent
game.reset()
state = game.get_state()
done = False

pygame.init()
cell_size = 20
screen_width = WIDTH * cell_size
screen_height = HEIGHT * cell_size
screen = pygame.display.set_mode((screen_width, screen_height))
pygame.display.set_caption("Snake Game")

clock = pygame.time.Clock()

while not done:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True

    screen.fill((0, 0, 0))  # Clear the screen

    # Draw the snake
    for segment in game.snake:
        pygame.draw.rect(screen, (0, 255, 0), (segment[0] * cell_size, segment[1] * cell_size, cell_size, cell_size))

    # Draw the apple
    pygame.draw.rect(screen, (255, 0, 0), (game.apple[0] * cell_size, game.apple[1] * cell_size, cell_size, cell_size))

    pygame.display.flip()  # Update the screen
    clock.tick(10)  # Limit the frame rate

    action = model.act(np.reshape(state, [1, STATE_SIZE]))
    game.move([(0, -1), (0, 1), (-1, 0), (1, 0)][action])
    state = game.get_state()
    done = game.done

pygame.quit()


ImportError: DLL load failed while importing _pywrap_tfe: The specified procedure could not be found.