In [1]:
import numpy as np

# import cupy as np
import random
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
from dataclasses import dataclass
from enum import Enum
import time
import torch

In [2]:
print(torch.__version__)
print("Cuda:", torch.cuda.is_available())
print("MPS:", torch.mps.is_available())
print("CPU:", torch.cpu.is_available())

2.8.0+cu128
Cuda: True
MPS: False
CPU: True


In [3]:
from Game import Game2048Env, Direction

In [4]:
game = Game2048Env(random_seed=1234)
state = game.reset()
done = False


def print_board(board):
    for x in board:
        print("\t".join(f"{v:4}" for v in x))
    print("-" * 20)


print_board(state)

done = False

while not done:  # Play some random moves
    # action = Direction(np.random.randint(4))  # Random action for demonstration
    actions = [Direction.LEFT, Direction.UP, Direction.RIGHT, Direction.DOWN]
    state, reward, done, meta = game.step(actions)

    # print(f"Action: {meta['direction']} | Score: {game.score}")
    # print(f"Reward: {reward} | Done: {done}")

print_board(state)

   0	   2	   0	   0
   2	   0	   0	   0
   0	   0	   0	   0
   0	   0	   0	   0
--------------------
   2	   0	   0	   0
   2	   4	   0	   0
   0	   0	   0	   0
   2	   0	   0	   0
--------------------


In [5]:
from SimpleNeuralNetwork import SimpleNeuralNetwork

In [6]:
from EvolutionaryOptimizer import EvolutionaryOptimizer

In [7]:
import pickle


def save_network(network: SimpleNeuralNetwork, filename: str):
    torch.save(network.state_dict(), filename)


def load_network(filename: str, hidden_layers: List[int]) -> SimpleNeuralNetwork:
    network = SimpleNeuralNetwork(hidden_layers=hidden_layers)
    network.load_state_dict(torch.load(filename, map_location=DEVICE))
    network.to(DEVICE)
    return network


def save_population(population: List[SimpleNeuralNetwork], filename: str):
    with open(filename, "wb") as f:
        pickle.dump(population, f)


def load_population(filename: str) -> List[SimpleNeuralNetwork]:
    with open(filename, "rb") as f:
        population = pickle.load(f)
    return population

In [None]:
from datetime import timedelta


def main():
    generations = 1000
    games_per_player = 10
    max_steps = 10000
    hidden_layers = [128]

    optimizer = EvolutionaryOptimizer(
        population_size=200,
        elite_size=20,
        new_members=20,
        mutation_rate=0.05,
        mutation_strength=0.1,
        hidden_layers=hidden_layers,
    )

    loop_start_time = time.time()

    # Create directory for saving networks
    import os

    folder = f"networks/{'_'.join(str(x) for x in hidden_layers)}"
    os.makedirs(folder, exist_ok=True)
    print(f"Saving networks to folder: {folder}")

    best_score = 0
    avg_scores = []
    for gen in range(generations):
        (population, avg_score, best_score) = optimizer.run_generation(
            games_per_player, max_steps, base_random_seed=gen
        )
        avg_scores.append(avg_score)
        elapsed_time = time.time() - loop_start_time
        average_time_per_iteration = elapsed_time / (gen + 1)
        duration = str(
            timedelta(seconds=(average_time_per_iteration * (generations - gen + 1)))
        )

        # Save best network every 10 generations
        print(
            f"⏳ {duration} | Generation {gen + 1}/{generations} - Average Score: {avg_score:.2f} - Best Score: {best_score:.2f} "
        )
        if (gen % 10 == 0) or (gen == generations - 1):
            print(f"Saving population at generation {gen + 1}")
            save_population(population, f"{folder}/population_gen_{gen + 1}.pkl")

    # evaluated = optimizer.evaluate(env, 10)
    # best_network, best_score = max(evaluated, key=lambda x: x[1])

    # Plot average scores over generations
    plt.plot(range(1, generations + 1), avg_scores)
    plt.xlabel("Generation")
    plt.ylabel("Average Score")
    plt.title("Evolution of Average Score over Generations")
    plt.show()


if __name__ == "__main__":
    main()

Saving networks to folder: networks/128
⏳ 2:36:02.073220 | Generation 1/1000 - Average Score: 26.08 - Best Score: 84.80 
Saving population at generation 1
⏳ 3:25:12.112212 | Generation 2/1000 - Average Score: 53.59 - Best Score: 94.00 
⏳ 4:10:56.013812 | Generation 3/1000 - Average Score: 83.31 - Best Score: 101.60 
⏳ 4:48:31.832261 | Generation 4/1000 - Average Score: 104.41 - Best Score: 129.20 
⏳ 5:13:17.305215 | Generation 5/1000 - Average Score: 109.00 - Best Score: 148.00 


In [9]:
avg_scores = []

folder = f"networks/{'_'.join(str(x) for x in hidden_layers)}"

# Plot average scores over generations
plt.plot(range(1, generations + 1), avg_scores)
plt.xlabel("Generation")
plt.ylabel("Average Score")
plt.title("Evolution of Average Score over Generations")
plt.show()

NameError: name 'hidden_layers' is not defined