In [None]:
import numpy as np

# import cupy as np
import matplotlib.pyplot as plt
import time
import torch
from typing import List
import pickle
from Player import Player
from joblib import Parallel, delayed
import joblib
import random

In [None]:
print(torch.__version__)
print("Cuda:", torch.cuda.is_available())
print("MPS:", torch.mps.is_available())
print("CPU:", torch.cpu.is_available())

In [None]:
from Game import Game2048Env, Direction

In [None]:
game = Game2048Env(random_seed=1234)
state = game.reset()
done = False


def print_board(board):
    for x in board:
        print("\t".join(f"{v:4}" for v in x))
    print("-" * 20)


print_board(state)

done = False

while not done:  # Play some random moves
    # action = Direction(np.random.randint(4))  # Random action for demonstration
    actions = [Direction.LEFT, Direction.UP, Direction.RIGHT, Direction.DOWN]
    state, reward, done, meta = game.step(actions)

    # print(f"Action: {meta['direction']} | Score: {game.score}")
    # print(f"Reward: {reward} | Done: {done}")

print_board(state)

In [None]:
from SimpleNeuralNetwork import SimpleNeuralNetwork

In [None]:
from EvolutionaryOptimizer import EvolutionaryOptimizer

In [None]:
def save_network(network: SimpleNeuralNetwork, filename: str):
    torch.save(network.state_dict(), filename)


def load_network(filename: str, hidden_layers: List[int]) -> SimpleNeuralNetwork:
    network = SimpleNeuralNetwork(hidden_layers=hidden_layers)
    network.load_state_dict(torch.load(filename, map_location=DEVICE))
    network.to(DEVICE)
    return network


def save_population(population: List[SimpleNeuralNetwork], filename: str):
    with open(filename, "wb") as f:
        pickle.dump(population, f)


def load_population(filename: str) -> List[SimpleNeuralNetwork]:
    with open(filename, "rb") as f:
        population = pickle.load(f)
    return population

In [None]:
from datetime import timedelta

generations = 100000
games_per_player = 1
max_steps = 10000
hidden_layers = [128]
mutation_rate = .5
mutation_strength = .5

optimizer = EvolutionaryOptimizer(
    population_size=200,
    elite_size=100,
    new_members=0,
    mutation_rate=mutation_rate,
    mutation_strength=mutation_strength,
    hidden_layers=hidden_layers,
)

loop_start_time = time.time()

# Create directory for saving networks
import os

folder = f"networks/{'_'.join(str(x) for x in hidden_layers)}"
os.makedirs(folder, exist_ok=True)
print(f"Saving networks to folder: {folder}")

best_score = 0
avg_scores = []
mutation_strengths = []
mutation_rates = []


def mutation_decay(x, mu0=0.25, k=3.0):
    return mu0 * np.exp(-k * x)

random_seed = random.randint(0, 999999999)
for gen in range(generations):
    # random_seed = random.randint(0, 999999999)
    # Scale mutation based on generation
    optimizer.mutation_strength = mutation_decay(
        gen / generations, k=3, mu0=mutation_strength
    )
    optimizer.mutation_rate = mutation_decay(gen / generations, k=3, mu0=mutation_rate)

    (population, avg_score, best_score) = optimizer.run_generation(
        games_per_player, max_steps, base_random_seed=random_seed
    )
    avg_scores.append(avg_score)
    mutation_strengths.append(optimizer.mutation_strength)
    mutation_rates.append(optimizer.mutation_rate)
    elapsed_time = time.time() - loop_start_time
    average_time_per_iteration = elapsed_time / (gen + 1)
    duration = str(
        timedelta(seconds=(average_time_per_iteration * (generations - gen + 1)))
    )

    # Save best network every 10 generations
    print(
        f"⏳ {duration} | Generation {gen + 1}/{generations} - Average Score: {avg_score:.2f} - Best Score: {best_score:.2f} "
    )
    print(f"🧟 str: {optimizer.mutation_strength} rate: {optimizer.mutation_rate}")
    if (gen % 10 == 0) or (gen == generations - 1):
        print(f"Saving population at generation {gen + 1}")
        save_population(population, f"{folder}/population_gen_{(gen + 1):010}.pkl")

# Plot average scores over generations
plt.plot(range(1, generations + 1), avg_scores)
# mutation_strengths = mutation_strengths * max(avg_scores)
# plt.plot(range(1, generations + 1), mutation_strengths)
plt.xlabel("Generation")
plt.ylabel("Average Score")
plt.title("Evolution of Average Score over Generations")
plt.show()

# plt.plot(range(1, generations + 1), avg_scores)
# mutation_strengths = mutation_strengths * max(avg_scores)
plt.plot(range(1, generations + 1), mutation_strengths)
plt.plot(range(1, generations + 1), mutation_rates)
plt.xlabel("Generation")
plt.ylabel("Mutation strength")
plt.title("Evolution of Mutation Strength over Generations")
plt.show()

In [None]:
avg_scores = []
folder = f"networks/{'_'.join(str(x) for x in hidden_layers)}"


def load_checkpoints(folder: str) -> List[str]:
    import os

    files = [
        f
        for f in os.listdir(folder)
        if f.startswith("population_gen_") and f.endswith(".pkl")
    ]

    ordered_checkpoints = sorted(
        files, key=lambda x: int(x.split("_")[2].split(".")[0])
    )
    return ordered_checkpoints


checkpoints = load_checkpoints(folder)
# Limit to the last checkpoint
checkpoints = checkpoints[-1:]
print(checkpoints)


games_per_player = 10
# random_seed = 42
average_scores = []

for checkpoint_index, checkpoint in enumerate(checkpoints):
    print(f"Checkpoint: {checkpoint_index}/{len(checkpoints)}")
    filename = f"{folder}/{checkpoint}"
    population = []
    with open(filename, "rb") as f:
        population = pickle.load(f)

    def eval_player_game(net, random_seed, games_per_player):
        average_score = 0
        player = Player(net.to("cpu"))
        for game_index in range(games_per_player):
            game_random_seed = random_seed + game_index
            env = Game2048Env(random_seed=game_random_seed)
            score = 0
            while True:
                board = env.board
                actions = player.next_move(board)
                state, reward, done, _ = env.step(actions)
                score += reward
                if done:
                    average_score += score
                    break

        average_score /= games_per_player
        return average_score

    scores = Parallel(n_jobs=joblib.cpu_count())(
        delayed(eval_player_game)(net, random_seed, games_per_player)
        for net in population
    )

    average_scores.append(sum(scores) / len(population))


# Plot average scores over generations
plt.plot(range(1, len(checkpoints) + 1), average_scores)
plt.xlabel("Generation")
plt.ylabel("Average Score")
plt.title("Evolution of Average Score over Generations")
plt.show()

In [None]:
selected_net = None
selected_population = None
for i, checkpoint in enumerate(checkpoints):
    population = []
    print(f"Loading checkpoint {i + 1}/{len(checkpoints)}: {checkpoint}")
    with open(f"{folder}/{checkpoint}", "rb") as f:
        population = pickle.load(f)    

    for i, net in enumerate(population):
        print(f"{i}: {net.age}")

    selected_net = population[0]
    selected_population = population


In [None]:
selected_net = population[0]

print(selected_net.network)

In [None]:
generation = 0
optimizer.population = selected_population
while True:
    (population, avg_score, best_score) = optimizer.run_generation(
        games_per_player, max_steps, base_random_seed=random_seed
    )
    generation += 1
    print(
        f"⏳ {duration} | Generation {generation } - Average Score: {avg_score:.2f} - Best Score: {best_score:.2f} "
    )
    print(f"🧟 str: {optimizer.mutation_strength} rate: {optimizer.mutation_rate}")    