In [1]:
import sys
sys.path.append('../../src/')
from tegame import Tegame,is_empty_lists
from encode import generate_all_possible_features

import torch
from tqdm import tqdm
from copy import deepcopy

%load_ext autoreload
%autoreload 2

In [2]:
def append_log(playcard_logs, log, include_only_combo=False):
    """
    Append a single log entry to the list of playcard logs.

    Parameters
    ----------
    playcard_logs : list
        The list that accumulates all log entries.

    log : dict
        A single log entry produced during gameplay.

    include_only_combo : bool, default=False
        If True, only logs where log['is_combo'] == True are kept.
        If False, all logs are appended.

    Notes
    -----
    deepcopy(log) is used to avoid accidental mutation of the original
    log dictionary later in the pipeline.
    """
    if include_only_combo and log['is_combo']:
        playcard_logs.append(deepcopy(log))
    elif include_only_combo and not log['is_combo']:
        # Skip non-combo logs when filtering is active
        pass
    else:
        # Default behavior: append everything
        playcard_logs.append(deepcopy(log))

    return playcard_logs


def append_logs(playcard_logs, log, include_only_combo=False):
    """
    Append one or multiple log entries to the playcard log list.

    Parameters
    ----------
    playcard_logs : list
        The list that accumulates all log entries.

    log : dict or list of dict
        A single log entry or a list of log entries.

    include_only_combo : bool, default=False
        Passed through to append_log() to control filtering.

    Notes
    -----
    This function normalizes the input so that both single logs and
    lists of logs are handled uniformly.
    """
    if log is None:
        return

    # Ensure we always iterate over a list
    if not isinstance(log, list):
        log = [log]

    for entry in log:
        playcard_logs = append_log(
            playcard_logs,
            entry,
            include_only_combo=include_only_combo
        )

    return playcard_logs

In [3]:
n_games=20

**Note:** The choice of threshold parameters here is not critical.  
> As shown in *statistical_analysis/variance_is_all_about_deck_shuffle_rather_than_thresholds.ipynb* and confirmed again in *statistical_analysis/different_thresholds_same_deck_shuffles_result_in_constant_rate.ipynb*, almost all variance in win rate comes from the deck shuffle, not from the thresholds.  
> For this reason we simply fix the thresholds and focus on generating many independent games for the training set.


In [4]:
# Dataset: one entry per play_card call
playcard_logs = []

# we run 20 games where all moves are put in the training set,
# while we run 20x4=80 games where only the combos are used in the training set
# here I'm talking about combos like: 1->21->11 (in ascending piles) and 100->81->91 (in descending piles)

for include_only_combo in [False,True,True,True]:
    for i in tqdm(range(n_games)):
        tegame_obj = Tegame(
            verb_lvl=0,
            thresh_nonmandatory=2,
            thresh_secondchoice=4,
            train_mode=True,
            players=2
        )
        
        tegame_obj.restart()
        
        
        while tegame_obj.game_ongoing:
            for active_player in range(tegame_obj.n_players):
        
                # Stop if all hands are empty
                if is_empty_lists(tegame_obj.hands):
                    tegame_obj.game_ongoing = False
                    break
        
                # Sort hand (as in original logic)
                tegame_obj.hands[active_player].sort()
                if tegame_obj.verb_lvl>=2: tegame_obj.print_stat_hands()
        
                # -------------------------
                # Mandatory moves
                # -------------------------
                for _ in range(tegame_obj.n_mandatory_moves):
                    logs = tegame_obj.play_card(active_player)
                    append_logs(playcard_logs, logs,include_only_combo=include_only_combo)
        
                    if tegame_obj.game_over: break
        
                if tegame_obj.game_over: break
        
                n_played = tegame_obj.n_mandatory_moves
        
                # -------------------------
                # Non-mandatory moves
                # -------------------------
                while not tegame_obj.hands[active_player]==[]:
                    
                    hand_old = tegame_obj.hands[active_player].copy()
        
                    logs = tegame_obj.play_card(active_player, mandatory_move=False)
        
                    append_logs(playcard_logs, logs,include_only_combo=include_only_combo)
        
                    # Stop if no card was played
                    if tegame_obj.hands[active_player] == hand_old: break
        
                    n_played += 1
        
                # -------------------------
                # Draw cards
                # -------------------------
                for _ in range(n_played):
                    tegame_obj.draw_one(active_player)
        
                if tegame_obj.verb_lvl>=2: tegame_obj.print_stat_piles()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 49.32it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 117.28it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 118.70it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 128.21it/s]


In [5]:
# We examine the training set: how many mandatory moves?
mandatory_move_count=0
for log in playcard_logs:
    hand,piles,mandatory_move,played,piles_to_avoid,is_combo = log.values()
    if mandatory_move: mandatory_move_count+=1
print(f'mandatory_moves: {mandatory_move_count}/{len(playcard_logs)}')

mandatory_moves: 1779/3108


In [6]:
# We examine the training set: how many combos?
n_combo = 0
for log in playcard_logs:
    hand,piles,mandatory_move,played,piles_to_avoid,is_combo = log.values()
    if is_combo: n_combo += 1
print(f'n_combo: {n_combo}/{len(playcard_logs)}')

n_combo: 726/3108


In [7]:
# In this cell we convert each logged game state into a training group.
#
# For every turn, the environment produces:
#   - the player's hand
#   - the current piles
#   - whether the move is mandatory
#   - the actual move played (card index, pile index)
#   - the piles to avoid (strategic constraint)
#
# From this information we build a *group* of training samples:
#
#   X_group : a tensor containing all possible (card, pile) actions
#             encoded as feature vectors for that specific turn.
#
#   y_group : the index of the action actually chosen by the agent
#             within the list of all possible actions.
#
# Why groups?
# -----------
# Each turn has a different number of legal actions, so we cannot
# flatten everything into a single global dataset. Instead, we treat
# each turn as a separate classification problem:
#
#     "Given these features, which action was chosen?"
#
# During training, the model receives X_group and must assign the
# highest probability to the correct action y_group. This structure
# allows the network to learn the agent's policy directly from
# logged gameplay, even when the number of available actions varies
# from turn to turn.

groups = []

for log in playcard_logs:
    hand, piles, mandatory_move, played, piles_to_avoid, _ = log.values()

    # Build X_group
    features = generate_all_possible_features(hand, piles, mandatory_move, piles_to_avoid)
    X_group = torch.tensor(features, dtype=torch.float32)
    
    # Compute y_group outside the feature generator
    played_card_idx, played_pile_idx = played

    if played_card_idx is None:
        # NOOP is the final move
        y_group = X_group.shape[0] - 1
    else:
        # actual move
        y_group = played_card_idx * len(piles) + played_pile_idx

    groups.append((X_group, y_group))

In [8]:
#save the traning set for subsequent training
torch.save(groups, "training_set.pt")