In [13]:
from joblib import Parallel, delayed
from puzzle import GameGrid
from main import ExpectimaxOptimizer
from heuristic import Heuristic
import numpy as np

def objective_function(params, num_games=20):
    """
    Objective function to optimize the median score of 2048 games using CMA-ES.
    Uses parallelization to run multiple games.

    Args:
        params: List/array of heuristic parameters [monotonicity_weight, empty_weight].
        num_games: Number of games to run for each evaluation (default is 20).

    Returns:
        float: Negative median score (to maximize the score).
    """
    heuristic_obj = Heuristic(params)
    optimizer = ExpectimaxOptimizer(heuristic_func=heuristic_obj)

    def run_single_game():
        """Run a single game and return its score."""
        game = GameGrid(auto_move=True, expectimax_func=optimizer.expectimax, 
                        depth_limit=2, silent=True)
        _, score = game.run_single_game()
        game.destroy()
        return score

    # Run games in parallel
    scores = Parallel(n_jobs=-1, backend='loky')(
        delayed(run_single_game)() for _ in range(num_games)
    )

    # Return the negative mean score for CMA-ES minimization
    mean = sum(scores) / num_games
    return -mean

In [None]:
import cma
bounds = [[0, 0], [10, 10]]
options = {
    'bounds': bounds,
    'maxiter': 100,  # Số lần lặp tối đa
    'popsize': 6,
    'tolfun': 100,
    'verb_disp': 1, 
}

In [8]:
es = cma.CMAEvolutionStrategy([1, 1], 2.5, options)

# Tối ưu hóa
es.optimize(objective_function)

(3_w,6)-aCMA-ES (mu_w=2.0,w_1=63%) in dimension 2 (seed=974652, Mon Apr 28 20:40:27 2025)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1      6 -3.526800000000000e+04 1.0e+00 2.70e+00  3e+00  3e+00 8:46.1
    2     12 -3.603000000000000e+04 1.2e+00 2.42e+00  2e+00  2e+00 25:07.0
    3     18 -3.697200000000000e+04 1.2e+00 2.24e+00  2e+00  2e+00 35:55.7
    4     24 -3.536800000000000e+04 1.5e+00 2.25e+00  2e+00  3e+00 49:07.3
    5     30 -3.613600000000000e+04 2.0e+00 2.35e+00  2e+00  2e+00 60:05.6
    6     36 -3.673200000000000e+04 1.5e+00 2.69e+00  2e+00  3e+00 75:36.7
    7     42 -3.680800000000000e+04 1.5e+00 2.14e+00  2e+00  2e+00 91:56.1
    8     48 -3.805800000000000e+04 1.5e+00 1.91e+00  1e+00  2e+00 107:13.4
    9     54 -3.715800000000000e+04 1.5e+00 2.22e+00  2e+00  2e+00 118:19.0
   10     60 -3.818000000000000e+04 1.4e+00 2.98e+00  3e+00  3e+00 127:20.1
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
   11     66 -

<cma.evolution_strategy.CMAEvolutionStrategy at 0x13123632de0>

In [9]:
optimal_params = es.result.xbest
optimal_score = -es.result.fbest
print("Optimal parameters:", optimal_params)
print("Best average score:", optimal_score)

Optimal parameters: [8.54520654 8.88788641]
Best average score: 48742.0


In [10]:
for w in optimal_params:
    print(w)

8.545206539422178
8.887886413852373


In [12]:
from puzzle import GameSimulator
heuristic_obj = Heuristic(optimal_params)
optimizer = ExpectimaxOptimizer(heuristic_func=heuristic_obj)
simulator = GameSimulator(num_games=100, expectimax_func=optimizer.expectimax, depth_limit=2)
simulator.run_simulation()

Running game 1/100
Game 1 completed. Score: 16520
Running game 2/100
Game 2 completed. Score: 60532
Running game 3/100
Game 3 completed. Score: 40284
Running game 4/100
Game 4 completed. Score: 26516
Running game 5/100
Game 5 completed. Score: 10268
Running game 6/100
Game 6 completed. Score: 15812
Running game 7/100
Game 7 completed. Score: 74896
Running game 8/100
Game 8 completed. Score: 36108
Running game 9/100
Game 9 completed. Score: 37068
Running game 10/100
Game 10 completed. Score: 53388
Running game 11/100
Game 11 completed. Score: 76884
Running game 12/100
Game 12 completed. Score: 47500
Running game 13/100
Game 13 completed. Score: 60544
Running game 14/100
Game 14 completed. Score: 27936
Running game 15/100
Game 15 completed. Score: 32892
Running game 16/100
Game 16 completed. Score: 58856
Running game 17/100
Game 17 completed. Score: 36860
Running game 18/100
Game 18 completed. Score: 61544
Running game 19/100
Game 19 completed. Score: 33336
Running game 20/100
Game 20 co

(72.0, 80124, 34908.76)