In [1]:
import sys
sys.path.append('../onitama/')

from trainer import RegularDataTrainer
from dl_players_v3 import CNNPlayer_v3

import numpy as np


2026-01-22 22:39:24.893309: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-22 22:39:24.894951: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-22 22:39:25.051009: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI AVX_VNNI_INT8 AVX_NE_CONVERT FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-22 22:39:27.169154: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computat

In [2]:


def action_to_one_hot(action, shape=(5, 5, 52)):
    """
    Convertit une action [col, ligne, action_id] en one-hot tensor
    
    Args:
        action: [col, ligne, action_id] ou liste d'actions (batch)
        shape: (height, width, n_actions)
        
    Returns:
        one_hot: (5, 5, 52) ou (batch, 5, 5, 52)
    """
    if isinstance(action, list) and len(action) == 3:
        # Une seule action
        col, ligne, action_id = action
        one_hot = np.zeros(shape, dtype=np.float32)
        one_hot[col, ligne, action_id] = 1.0
        return one_hot
    else:
        # Batch d'actions
        batch_size = len(action)
        one_hot_batch = np.zeros((batch_size, *shape), dtype=np.float32)
        
        for i, act in enumerate(action):
            col, ligne, action_id = act
            one_hot_batch[i, col, ligne, action_id] = 1.0
        
        return one_hot_batch

def flat_index_to_action(flat_index):
    """
    Convertit index flat [0, 1299] en [col, ligne, move_id]
    """
    col = flat_index // (5 * 52)
    ligne = (flat_index // 52) % 5
    move_id = flat_index % 52
    return col, ligne, move_id


def decode_flat_policy(flat_policy):
    """
    Décode un vecteur aplati (1300,) en [col, ligne, move_id]
    
    Args:
        flat_policy: array de shape (1300,) - one-hot ou probabilités
        
    Returns:
        action: [col, ligne, move_id]
    """
    # 1. Trouver l'index du maximum (ou du 1.0 si one-hot)
    best_index = np.argmax(flat_policy)
    
    # 2. Décoder l'index
    col = best_index // (5 * 52)
    ligne = (best_index // 52) % 5
    move_id = best_index % 52
    
    return [int(col), int(ligne), int(move_id)]



def compute_topk_accuracy(model, states, policies, n_samples, k_values=[1, 5, 10, 15]):
    """
    Échantillonne n_samples depuis states/policies et calcule l'accuracy top-k.

    Args:
        model: Le modèle CNN à évaluer
        states: np.array de shape (N, 5, 5, 10) - les états
        policies: np.array de shape (N, 1300) - les politiques one-hot aplaties
        n_samples: Nombre d'échantillons à prélever
        k_values: Liste des valeurs de k pour le top-k accuracy

    Returns:
        dict: {k: accuracy} pour chaque k dans k_values
    """
    n_total = len(states)
    n_samples = min(n_samples, n_total)

    # Échantillonnage aléatoire avec indices cohérents
    indices = np.random.choice(n_total, size=n_samples, replace=False)
    sampled_states = states[indices]
    sampled_policies = policies[indices]

    # Prédictions du modèle (accès au modèle Keras sous-jacent)
    # Le modèle retourne [policy_logits, value] - on prend seulement policy
    predictions = model.model.predict(sampled_states, verbose=0)[0]

    # Pour chaque échantillon, trouver l'action correcte (index du 1 dans one-hot)
    true_actions = np.argmax(sampled_policies, axis=1)

    # Calcul des accuracies top-k
    accuracies = {}
    for k in k_values:
        # Pour chaque prédiction, récupérer les indices des k meilleures actions
        top_k_indices = np.argsort(predictions, axis=1)[:, -k:]  # k plus grands

        # Vérifier si l'action correcte est dans le top-k
        correct = 0
        for i in range(n_samples):
            if true_actions[i] in top_k_indices[i]:
                correct += 1

        accuracies[k] = correct / n_samples

    return accuracies




In [3]:

#Récupération des données

folder_data = "../data/"

states_files = [
    'training-data-heuristic-vs-laheuristic2-states.pkl',
    'training-data-heuristic-vs-laheuristic3-2-states.pkl',
    'training-data-heuristic-vs-laheuristic3-states.pkl',
    'training-data-laheuristic2-vs-laheuristic3-states.pkl',
    'training-data-random-vs-laheuristic3-states.pkl'
]

policy_files = [
    'training-data-heuristic-vs-laheuristic2-actions.pkl',
    'training-data-heuristic-vs-laheuristic3-2-actions.pkl',
    'training-data-heuristic-vs-laheuristic3-actions.pkl',
    'training-data-laheuristic2-vs-laheuristic3-actions.pkl',
    'training-data-random-vs-laheuristic3-actions.pkl'
]

states = []
policies = []

for i in range(len(states_files)):
    filename_states = folder_data+states_files[i]
    filename_policies = folder_data+policy_files[i]
    states += RegularDataTrainer.getTrainedData(filepath=filename_states)
    policies += RegularDataTrainer.getTrainedData(filepath=filename_policies)
    print(f"File {filename_states} loaded !")
    print(f"File {filename_policies} loaded !")

print("\n\nTotal :")
print(f"{len(states)} states")
print(f"{len(policies)} policies")

states = np.array(states)
print(states.shape)

#On est en (10, 5, 5) le réseau attend du (5, 5, 10) il faut transposer
states = np.transpose(states, (0, 2, 3, 1))
print(states.shape) #Maintenant on est bien (en 5,5,10)


#On applique one Hot Encoder sur les actions (policies)
policies = action_to_one_hot(policies)

#Aplatir pour correspondre à la sortie du modèle
policies = policies.reshape(-1, 5 * 5 * 52)  # (batch, 1300)


File ../data/training-data-heuristic-vs-laheuristic2-states.pkl loaded !
File ../data/training-data-heuristic-vs-laheuristic2-actions.pkl loaded !
File ../data/training-data-heuristic-vs-laheuristic3-2-states.pkl loaded !
File ../data/training-data-heuristic-vs-laheuristic3-2-actions.pkl loaded !
File ../data/training-data-heuristic-vs-laheuristic3-states.pkl loaded !
File ../data/training-data-heuristic-vs-laheuristic3-actions.pkl loaded !
File ../data/training-data-laheuristic2-vs-laheuristic3-states.pkl loaded !
File ../data/training-data-laheuristic2-vs-laheuristic3-actions.pkl loaded !
File ../data/training-data-random-vs-laheuristic3-states.pkl loaded !
File ../data/training-data-random-vs-laheuristic3-actions.pkl loaded !


Total :
226287 states
226287 policies
(226287, 10, 5, 5)
(226287, 5, 5, 10)


In [4]:


network = CNNPlayer_v3()
network.load_weights("../saved-models/CNNPlayer-v3-weights.weights.h5")

print(compute_topk_accuracy(network, states, policies, 10000))


2026-01-22 22:40:13.269933: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


{1: 0.6406, 5: 0.9333, 10: 0.985, 15: 0.9962}
