In [34]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from collections import deque

In [35]:
class Game2048:
    def __init__(self):
        self.board = np.zeros((4, 4), dtype=int)
        self.add_tile()
        self.add_tile()

    def add_tile(self):
        empty_cells = np.argwhere(self.board == 0)
        cell = empty_cells[random.randint(0, len(empty_cells) - 1)]
        value = 2 if random.random() < 0.9 else 4
        self.board[tuple(cell)] = value

    def move(self, direction):
        moved, reward = False, 0
        if direction in [0, 1]:  # up or down
            self.board = self.board.T
        if direction in [1, 3]:  # down or right
            self.board = np.flip(self.board, axis=1)
        
        for i in range(4):
            row = self.board[i][self.board[i] != 0]
            row, row_reward = self.merge(row)
            reward += row_reward
            self.board[i, :len(row)] = row
            self.board[i, len(row):] = 0

        if direction in [0, 1]:  # up or down
            self.board = self.board.T
        if direction in [1, 3]:  # down or right
            self.board = np.flip(self.board, axis=1)
        
        return moved, reward

    def merge(self, row):
        reward = 0
        new_row = np.zeros(4, dtype=int)
        j = 0
        skip = False
        for i in range(len(row)):
            if skip:
                skip = False
                continue
            if i + 1 < len(row) and row[i] == row[i + 1]:
                new_row[j] = row[i] * 2
                reward += new_row[j]
                skip = True
            else:
                new_row[j] = row[i]
                skip = False
            j += 1
        return new_row, reward

    def is_done(self):
        if np.any(self.board == 0):
            return False
        for direction in range(4):
            test_board = self.board.copy()
            test_game = Game2048()
            test_game.board = test_board
            moved, _ = test_game.move(direction)
            if moved:
                return False
        return True

    def reset(self):
        self.board = np.zeros((4, 4), dtype=int)
        self.add_tile()
        self.add_tile()
        return self.board.copy()

    def step(self, action):
        moved, reward = self.move(action)
        if moved:
            self.add_tile()
        next_state = self.board.copy()
        done = self.is_done()
        return next_state, reward, done
    

In [36]:
# Hyperparameters
alpha = 0.001
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
batch_size = 64
episodes = 10000
memory_size = 10000

In [37]:
# Create DQN model
def create_model():
    model = Sequential([
        Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=(4, 4, 1)),
        Flatten(),
        Dense(256, activation='relu'),
        Dense(4, activation='linear')
    ])

    model.compile(optimizer=Adam(learning_rate=alpha), loss='mse')
    return model

# Prepare state input
def preprocess_state(state):
    return np.expand_dims(np.expand_dims(state, axis=-1), axis=0).astype(np.float32)

# Update target model weights
def update_target_model():
    target_model.set_weights(model.get_weights())

# Action selection
def select_action(state):
    if np.random.rand() < epsilon:
        return random.randrange(4)  # Random action
    else:
        q_values = model.predict(state)  # Predict Q-values for each action
        return np.argmax(q_values[0])  # Select action with the highest Q-value

# Experience replay
def replay():
    if len(memory) < batch_size:
        return

    minibatch = random.sample(memory, batch_size)

    states, targets = [], []

    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target += gamma * np.amax(target_model.predict(next_state)[0])

        target_f = model.predict(state)
        target_f[0][action] = target

        states.append(state[0])
        targets.append(target_f[0])

    model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)

In [38]:
# Initialize the DQN model
model = create_model()
target_model = create_model()
memory = deque(maxlen=memory_size)

# Initialize the 2048 game environment
game = Game2048()

target_score = 2048
max_steps_per_episode = 50
# Main training loop
for episode in range(episodes):
    state = preprocess_state(game.reset())
    done = False
    steps = 0
    episode_score = 0

    while not done and steps < max_steps_per_episode:        
        action = select_action(state)
        next_state, reward, done = game.step(action)
        next_state = preprocess_state(next_state)

        memory.append((state, action, reward, next_state, done))
        state = next_state
        episode_score += reward

        
        replay()
        
        steps += 1
    
    update_target_model()
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    print(f"Episode: {episode + 1}, Score: {episode_score}, Epsilon: {epsilon:.2f}")

    if episode_score >= target_score:
        print("Target score achieved. Stopping training.")
        break

Episode: 1, Score: 4, Epsilon: 0.99


KeyboardInterrupt: 

In [12]:
def test_model(model, episodes):
    scores = []
    for episode in range(episodes):
        game = Game2048()
        state = preprocess_state(game.board)
        done = False
        score = 0

        while not done:            
            q_values = model.predict(state)
            action = np.argmax(q_values[0])
            _, reward, done = game.step(action)
            state = preprocess_state(game.board)
            score += reward

        scores.append(score)

    return scores

test_episodes = 100
scores = test_model(model, test_episodes)
average_score = np.mean(scores)
max_score = np.max(scores)

print(f"Average score over {test_episodes} episodes: {average_score}")
print(f"Max score: {max_score}")


hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi


KeyboardInterrupt: 

In [9]:
episodes = 10

In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from collections import deque

# Hyperparameters
alpha = 0.001
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
batch_size = 64
episodes = 10000
memory_size = 10000
target_score = 2048
max_steps_per_episode = 1000
update_frequency = 10  # Update the model every 10 steps


class Game2048:
    def __init__(self):
        self.board = np.zeros((4, 4), dtype=int)
        self.add_tile()
        self.add_tile()

    def add_tile(self):
        empty_cells = np.argwhere(self.board == 0)
    
        if len(empty_cells) == 0:  # Check if there are any empty cells
            return
    
        cell = empty_cells[random.randint(0, len(empty_cells) - 1)]
        value = 2 if random.random() < 0.9 else 4
        self.board[tuple(cell)] = value


    def move(self, direction):
        moved, reward = False, 0
        old_board = self.board.copy()

        if direction in [0, 1]:  # up or down
            self.board = self.board.T
        if direction in [1, 3]:  # down or right
            self.board = np.flip(self.board, axis=1)

        for i in range(4):
            row = self.board[i][self.board[i] != 0]
            row, row_reward = self.merge(row)
            reward += row_reward
            self.board[i, :len(row)] = row
            self.board[i, len(row):] = 0

        if direction in [0, 1]:  # up or down
            self.board = self.board.T
        if direction in [1, 3]:  # down or right
            self.board = np.flip(self.board, axis=1)

        moved = not np.array_equal(old_board, self.board)
        return moved, reward

    def merge(self, row):
        reward = 0
        new_row = np.zeros(4, dtype=int)
        j = 0
        skip = False
        for i in range(len(row)):
            if skip:
                skip = False
                continue
            if i + 1 < len(row) and row[i] == row[i + 1]:
                new_row[j] = row[i] * 2
                reward += new_row[j]
                skip = True
            else:
                new_row[j] = row[i]
                skip = False
            j += 1
        return new_row, reward

    def is_done(self):
        if np.any(self.board == 0):
            return False
        for direction in range(4):
            test_board = self.board.copy()
            test_game = Game2048()
            test_game.board = test_board
            moved, _ = test_game.move(direction)
            if moved:
                return False
        return True

    def reset(self):
        self.board = np.zeros((4, 4), dtype=int)
        self.add_tile()
        self.add_tile()
        return self.board

    def step(self, action):
        moved, reward = self.move(action)
        if moved:
            self.add_tile()
        next_state = self.board.copy()
        done = self.is_done()
        return next_state, reward, done

# Create DQN model
def create_model():
    model = Sequential([
        Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=(4, 4, 1)),
        Flatten(),
        Dense(256, activation='relu'),
        Dense(4, activation='linear')
    ])

    model.compile(optimizer=Adam(learning_rate=alpha), loss='mse')
    return model

# Prepare state input
def preprocess_state(state):
    return np.expand_dims(np.expand_dims(state.astype(np.float32), axis=-1), axis=0)

# Update target model weights
def update_target_model():
    target_model.set_weights(model.get_weights())

# Action selection
def select_action(state):
    if np.random.rand() < epsilon:
        return random.randrange(4)  # Random action
    else:
        q_values = model.predict(state)  # Predict Q-values for each action
        return np.argmax(q_values[0])  # Select action with the highest Q-value

# Experience replay
def replay():
    if len(memory) < batch_size:
        return

    minibatch = random.sample(memory, batch_size)

    states, targets = [], []

    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target += gamma * np.amax(target_model.predict(next_state)[0])

        target_f = model.predict(state)
        target_f[0][action] = target

        states.append(state[0])
        targets.append(target_f[0])

    model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)

# Initialize the DQN model
model = create_model()
target_model = create_model()

# Initialize the 2048 game environment
game = Game2048()

# Initialize memory for experience replay
memory = deque(maxlen=memory_size)

# Main training loop
step = 0
for episode in range(episodes):
    state = preprocess_state(game.reset())
    done = False

    while not done:
        action = select_action(state)
        next_state, reward, done = game.step(action)
        next_state = preprocess_state(next_state)

        memory.append((state, action, reward, next_state, done))
        state = next_state

        step += 1
        if step % update_frequency == 0:
            replay()


        if done:
            break

    update_target_model()
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    if np.max(game.board) >= target_score:
        print(f"Reached target score {target_score} at episode {episode}.")
        break

    print(f"Episode {episode}, Total reward: {total_reward}, Max tile: {np.max(game.board)}")





























































































































































































































































































































































































































































































































































































































































































































