In [1]:
import numpy as np
# import cupy as np
import random
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
from dataclasses import dataclass
from enum import Enum
import time

import torch

In [2]:
# print(torch.__version__)
# print(torch.backends.mps.is_available())
# print(torch.tensor([1,2,3], device='mps'))  # Should succeed on Apple Silicon

In [3]:
class Direction(Enum):
    LEFT = 0
    UP = 1
    RIGHT = 2
    DOWN = 3

In [4]:
import numpy as np

class Game2048Env:
    def __init__(self):
        self.grid_size = 4
        self.reset()
        
    def reset(self):
        self.board = np.zeros((self.grid_size, self.grid_size), dtype=int)
        self.spawn_tile()
        self.spawn_tile()
        self.score = 0
        return self.board.copy()
    
    def spawn_tile(self):
        empty = list(zip(*np.where(self.board == 0)))
        if empty:
            x, y = empty[np.random.randint(len(empty))]
            self.board[x, y] = 2 if np.random.random() < 0.9 else 4
        
    def step(self, action: Direction):
        moved, reward = self.move(action.value)
        if moved:
            self.spawn_tile()
        else:
            # Stop if invalid move
            return self.board.copy(), reward, True, {}
        done = not self.can_move()
        self.score += reward
        return self.board.copy(), reward, done, {}
    
    def move(self, direction):
        board = np.copy(self.board)
        reward = 0
        moved = False

        # Rotate board so all moves are left-moves
        for _ in range(direction):
            board = np.rot90(board)
            
        for i in range(self.grid_size):
            tiles = board[i][board[i] != 0]  # Extract non-zero
            merged = []
            j = 0
            while j < len(tiles):
                if j + 1 < len(tiles) and tiles[j] == tiles[j + 1]:
                    merged_val = tiles[j] * 2
                    reward += merged_val
                    merged.append(merged_val)
                    j += 2  # Skip next
                    moved = True
                else:
                    merged.append(tiles[j])
                    j += 1
            # Pad with zeros to the right
            merged += [0] * (self.grid_size - len(merged))
            # Detect if move or merge happened
            if not np.array_equal(board[i], merged):
                moved = True
            board[i] = merged

        # Restore original orientation
        for _ in range((4 - direction) % 4):
            board = np.rot90(board)
            
        if moved:
            self.board = board

        return moved, reward

    
    def can_move(self):
        for direction in range(4):
            temp_board = self.board.copy()
            moved, _ = self.move(direction)
            self.board = temp_board  # Restore original
            if moved:
                return True
        return False



In [5]:
game = Game2048Env()
state = game.reset()
done = False

def print_board(board):
    for x in board:
        print("\t".join(f"{v:4}" for v in x))
    print("-" * 20)

print_board(state)

done = False

while not done:  # Play some random moves

    # action = Direction(np.random.randint(4))  # Random action for demonstration
    action = Direction.LEFT  # Fixed action for demonstration
    state, reward, done, _ = game.step(action)

    print(f"Action: {action.name} | Score: {game.score}")
    print(f"Reward: {reward} | Done: {done}")
    
    print_board(state)
    
    

   0	   2	   0	   0
   0	   0	   0	   0
   0	   0	   0	   0
   0	   0	   4	   0
--------------------
Action: LEFT | Score: 0
Reward: 0 | Done: False
   2	   0	   0	   0
   0	   0	   0	   0
   0	   0	   2	   0
   4	   0	   0	   0
--------------------
Action: LEFT | Score: 0
Reward: 0 | Done: False
   2	   0	   0	   0
   0	   0	   2	   0
   2	   0	   0	   0
   4	   0	   0	   0
--------------------
Action: LEFT | Score: 0
Reward: 0 | Done: False
   2	   0	   0	   0
   2	   0	   0	   0
   2	   0	   0	   0
   4	   2	   0	   0
--------------------
Action: LEFT | Score: 0
Reward: 0 | Done: True
   2	   0	   0	   0
   2	   0	   0	   0
   2	   0	   0	   0
   4	   2	   0	   0
--------------------


In [6]:
import torch.nn as nn

class SimpleNeuralNetwork(nn.Module):
    """Simple feedforward neural network using PyTorch"""

    def __init__(self, input_size: int = 16, hidden_layers: List[int] = [256], output_size: int = 4, empty: bool = False):
        super().__init__()
        
        if empty:
            return
        
        # Build layers using PyTorch modules
        layers = []
        prev_size = input_size
        
        # Add hidden layers
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.Tanh())
            prev_size = hidden_size
            
        # Add output layer (no activation)
        layers.append(nn.Linear(prev_size, output_size))
        
        self.network = nn.Sequential(*layers)
        
        # Initialize weights using He initialization
        self._initialize_weights()
    
    def _initialize_weights(self):
        """Initialize weights using He initialization"""
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight, nonlinearity='tanh')
                nn.init.zeros_(module.bias)
    
    def forward(self, x):
        """Forward pass through the network"""
        # Convert numpy array to tensor if needed
        if isinstance(x, np.ndarray):
            x = torch.from_numpy(x).float()
        
        # Ensure we're on the right device (MPS if available)
        device = next(self.parameters()).device
        x = x.to(device)
        
        return self.network(x)
    
    def mutate(self, mutation_rate: float = 0.1, mutation_strength: float = 0.5):
        """Mutate the network's weights and biases"""
        with torch.no_grad():
            for param in self.parameters():
                if torch.rand(1).item() < mutation_rate:
                    mutation = torch.randn_like(param) * mutation_strength
                    param.add_(mutation)
    
    def copy(self):
        """Create a copy of the network"""
        new_network = SimpleNeuralNetwork(empty=True)
        new_network.network = type(self.network)()
        new_network.load_state_dict(self.state_dict())
        return new_network

In [7]:
@dataclass
class GameResult:
    score: int
    max_tile: int
    moves: int

class Player:
    def __init__(self, network: SimpleNeuralNetwork):
        self.network = network
        # Move network to MPS if available
        if torch.backends.mps.is_available():
            self.network = self.network.to('mps')

    def play(self, env: Game2048Env, max_steps: int = 100) -> GameResult:
        state = env.reset()
        total_reward = 0
        done = False
        steps = 0

        while not done and steps < max_steps:
            action = self.next_move(state)

            state, reward, done, _ = env.step(action)
            total_reward += reward
            steps += 1

        return GameResult(score=total_reward, max_tile=np.max(state), moves=steps)
    
    def next_move(self, state: np.ndarray) -> Direction:
        self.network.eval()  # Set to evaluation mode
        with torch.no_grad():
            flat_state = state.flatten() / 2048.0  # Normalize input
            q_values = self.network.forward(flat_state)
            # Convert back to numpy for argmax
            if isinstance(q_values, torch.Tensor):
                q_values = q_values.cpu().numpy()
            action = Direction(np.argmax(q_values))  # Choose action with highest Q-value
            return action

In [8]:
class EvolutionaryOptimizer:
    def __init__(
            self, 
            population_size: int = 50, 
            elite_size: int = 10,
            mutation_rate: float = 0.1, 
            mutation_strength: float = 0.5,
            hidden_layers: List[int] = [32]
        ):
        self.population_size = population_size
        self.elite_size = elite_size
        self.mutation_rate = mutation_rate
        self.mutation_strength = mutation_strength
        self.hidden_layers = hidden_layers
        
        # Create initial population
        self.population = []
        for _ in range(population_size):
            network = SimpleNeuralNetwork(hidden_layers=hidden_layers)
            # Move to MPS if available
            if torch.backends.mps.is_available():
                network = network.to('mps')
            self.population.append(network)

    def evaluate(self, env: Game2048Env, games_per_player: int = 5, max_steps: int = 100) -> List[Tuple[SimpleNeuralNetwork, float]]:
        results = []
        for network in self.population:
            player = Player(network)
            total_score = 0
            for _ in range(games_per_player):
                game_result = player.play(env, max_steps=max_steps)
                total_score += game_result.score
            avg_score = total_score / games_per_player
            results.append((network, avg_score))
        return results

    def select_and_breed(self, evaluated: List[Tuple[SimpleNeuralNetwork, float]]) -> None:
        # Sort by score descending
        evaluated.sort(key=lambda x: x[1], reverse=True)
        elite = evaluated[:self.elite_size] 

        new_population = []
        # Keep elite networks
        for net, _ in elite:
            new_population.append(net)
        
        # Create offspring by mutating elite networks
        while len(new_population) < self.population_size:
            parent = random.choice(elite)[0]
            
            # Create a child by copying the parent's state
            child = SimpleNeuralNetwork(hidden_layers=self.hidden_layers)
            child.load_state_dict(parent.state_dict())
            
            # Move to same device as parent
            child = child.to(next(parent.parameters()).device)
            
            # Mutate the child
            child.mutate(self.mutation_rate, self.mutation_strength)
            new_population.append(child)

        self.population = new_population[:self.population_size]

    def run_generation(self, env: Game2048Env, games_per_player: int = 5, max_steps: int = 1000) -> float:
        evaluated = self.evaluate(env, games_per_player, max_steps=max_steps)
        avg_score = sum(score for _, score in evaluated) / len(evaluated)
        self.select_and_breed(evaluated)
        return avg_score

In [None]:
best_network = None
best_score = 0

from datetime import timedelta

def main():
    env = Game2048Env()
    optimizer = EvolutionaryOptimizer(
        population_size=100, 
        elite_size=50, 
        mutation_rate=0.1, 
        mutation_strength=0.3, 
        hidden_layers=[256, 256, 256]
    )
    generations = 10
    games_per_player = 10
    max_steps = 1000

    avg_scores = []

    loop_start_time = time.time()

    for gen in range(generations):
        print(f"=== Generation {gen+1}/{generations} ===")
        
        avg_score = optimizer.run_generation(env, games_per_player, max_steps)
        avg_scores.append(avg_score)
        elapsed_time = time.time() - loop_start_time
        average_time_per_iteration = elapsed_time / (gen + 1)
        duration = str(timedelta(seconds=(average_time_per_iteration * (generations - gen + 1))))
        
        print(f"⏳ {duration} | Generation {gen+1}/{generations} - Average Score: {avg_score}")

    global best_network, best_score
    evaluated = optimizer.evaluate(env, games_per_player)
    best_network, best_score = max(evaluated, key=lambda x: x[1])

    # Plot average scores over generations
    plt.plot(range(1, generations + 1), avg_scores)
    plt.xlabel('Generation')
    plt.ylabel('Average Score')
    plt.title('Evolution of Average Score over Generations')
    plt.show()

if __name__ == "__main__":
    main()

=== Generation 1/100 ===
⏳ 0:13:05.874320 | Generation 1/100 - Average Score: 29.247999999999998
=== Generation 2/100 ===
⏳ 0:13:58.048899 | Generation 2/100 - Average Score: 32.436000000000014
=== Generation 3/100 ===
⏳ 0:13:53.956931 | Generation 3/100 - Average Score: 30.936000000000007
=== Generation 4/100 ===
⏳ 0:14:11.353983 | Generation 4/100 - Average Score: 37.17200000000002
=== Generation 5/100 ===


In [None]:
# Playing with the best network
if best_network:
    best_tile = 0
    best_score = 0
    for _ in range(100):
        env = Game2048Env()
        player = Player(best_network)
        result = player.play(env, max_steps=1000)
        best_tile = result.max_tile if best_tile < result.max_tile else best_tile
        best_score = result.score if best_score < result.score else best_score
        print(f"Played a game - Score: {result.score}, Max Tile: {result.max_tile}, Moves: {result.moves}")
    print(f"Best tile: {best_tile}, Best score: {best_score}")

Played a game - Score: 168, Max Tile: 32, Moves: 1000
Played a game - Score: 156, Max Tile: 16, Moves: 1000
Played a game - Score: 140, Max Tile: 16, Moves: 1000
Played a game - Score: 52, Max Tile: 8, Moves: 1000
Played a game - Score: 232, Max Tile: 32, Moves: 1000
Played a game - Score: 144, Max Tile: 16, Moves: 1000
Played a game - Score: 180, Max Tile: 32, Moves: 1000
Played a game - Score: 184, Max Tile: 16, Moves: 1000
Played a game - Score: 108, Max Tile: 16, Moves: 1000
Played a game - Score: 104, Max Tile: 16, Moves: 1000
Played a game - Score: 272, Max Tile: 32, Moves: 1000
Played a game - Score: 136, Max Tile: 16, Moves: 1000
Played a game - Score: 132, Max Tile: 16, Moves: 1000
Played a game - Score: 144, Max Tile: 16, Moves: 1000
Played a game - Score: 52, Max Tile: 8, Moves: 1000
Played a game - Score: 144, Max Tile: 16, Moves: 1000
Played a game - Score: 72, Max Tile: 8, Moves: 1000
Played a game - Score: 196, Max Tile: 32, Moves: 1000
Played a game - Score: 56, Max Til

In [None]:
import time

if best_network:
    env = Game2048Env()
    player = Player(best_network)

    while True:
        board = env.board
        print_board(board)
        action = player.next_move(board)
        prev_state = state.copy()
        state, reward, done, _ = env.step(action)
        print(f"Action: {action.name}, Best: {np.max(state)}")
        if (prev_state == state).all():
            print("nop")
            print_board(prev_state)
            print_board(state)
            break
        if done:
            print("Game Over")
            print_board(state)
            break

        #time.sleep(.2)  # Pause for a second to visualize

    

   0	   0	   0	   0
   0	   0	   2	   0
   0	   0	   0	   2
   0	   0	   0	   0
--------------------
Action: LEFT, Best: 2
   0	   0	   0	   0
   2	   0	   0	   0
   2	   0	   2	   0
   0	   0	   0	   0
--------------------
Action: UP, Best: 4
   4	   0	   2	   0
   0	   0	   0	   0
   4	   0	   0	   0
   0	   0	   0	   0
--------------------
Action: RIGHT, Best: 4
   0	   0	   4	   2
   0	   0	   0	   0
   0	   0	   0	   4
   0	   2	   0	   0
--------------------
Action: LEFT, Best: 4
   4	   2	   2	   0
   0	   0	   0	   0
   4	   0	   0	   0
   2	   0	   0	   0
--------------------
Action: RIGHT, Best: 4
   0	   0	   4	   4
   0	   0	   0	   0
   0	   0	   0	   4
   0	   0	   2	   2
--------------------
Action: LEFT, Best: 8
   8	   0	   0	   0
   0	   0	   0	   0
   4	   0	   0	   0
   4	   0	   2	   0
--------------------
Action: RIGHT, Best: 8
   0	   0	   0	   8
   0	   0	   0	   0
   0	   2	   0	   4
   0	   0	   4	   2
--------------------
Action: LEFT, Best: 8
   8	   0	   0	

In [None]:
import pickle

with open(f"bestnetwork-{time.time()}.pkl", "wb") as f:
    pickle.dump(best_network, f)
    print(f"Saved: {f.name}")

Saved: bestnetwork-1759940911.105819.pkl
