In [1]:
# Imports
import ast
import copy
import os
import pandas as pd
import pickle
import random
import yaml

In [None]:
def color_strategy(val):
    """Return a CSS background-color depending on the action."""
    
    if val == "S":
        return "background-color: gold"     
    elif val == "H":
        return "background-color: white"    
    else:
        return "" 

In [3]:
directory = "../Agent-Pro/dicts/vanilla_run"
dicts = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith('.pkl'):  
        filepath = os.path.join(directory, filename)
        with open(filepath, 'rb') as f:
            d = pickle.load(f)
            dicts.append(d)

results = {}
for d in dicts:
    for key, inner in d.items():
        if key not in results:
            results[key] = {}
        for subkey, sublist in inner.items():
            if subkey not in results[key]:
                results[key][subkey] = []
            results[key][subkey].extend(sublist)

In [4]:
# Convert lists into letters
results_copy = copy.deepcopy(results)
for row in results.items():
    for key in row[1].keys():
        if len(row[1][key]) != 0:
            row[1][key] = 'H' if round(sum(row[1][key]) / len(row[1][key])) == 0 else 'S'

In [5]:
# Visualize results
df = pd.DataFrame.from_dict(results, orient='index')
styled_df = df.style.map(color_strategy)
styled_df

Unnamed: 0,2,3,4,5,6,7,8,9,10,A
17,S,S,S,S,S,S,H,H,H,H
16,H,H,H,S,S,H,H,H,H,H
15,H,H,H,H,S,H,H,H,H,H
14,H,H,H,H,H,H,H,H,H,H
13,H,H,H,H,H,H,H,H,H,H
12,H,H,H,H,S,H,H,H,H,H
11,H,H,H,H,H,H,H,H,H,H
10,H,H,H,H,H,H,H,H,H,H
9,H,H,H,H,H,H,H,H,H,H
8,H,H,H,H,H,H,H,H,H,H


In [None]:
def get_game_state_from_yaml(data):
    """
    Attempts to extract player's initial hand and dealer's visible card from YAML data.
    """
    
    player_cards = None
    dealer_visible_card = None
    
    if isinstance(data, list) and data:
        last_entry = data[-1]
        if isinstance(last_entry, dict) and "final cards" in last_entry:
            final_cards_str = last_entry["final cards"]
            
            # Attempt to parse player cards if not already found
            if player_cards is None and "player [" in final_cards_str:
                player_part = final_cards_str.split("player [")[1]
                player_cards_str_list = "[" + player_part.split("]")[0] + "]"
                parsed_p_cards = ast.literal_eval(player_cards_str_list)
                if isinstance(parsed_p_cards, list): # Ensure it's a list
                    player_cards = parsed_p_cards

            # Attempt to parse dealer's visible card if not already found
            if dealer_visible_card is None and "dealer [" in final_cards_str:
                dealer_part = final_cards_str.split("dealer [")[1].split("]")[0]
                dealer_all_cards = ast.literal_eval("[" + dealer_part + "]")
                if isinstance(dealer_all_cards, list) and dealer_all_cards:
                    dealer_visible_card = dealer_all_cards[1] # Second card is visible

    return player_cards, dealer_visible_card

In [None]:
def calculate_value(hand, max=21):
    """Calculates the value of a blackjack hand."""
    
    ace_count = sum(1 for card in hand if card and len(card) > 1 and card[1] == 'A')
    total = 0
    for card in hand:
        if not card or len(card) < 2: # Basic validation
            print(f"Warning: Invalid card '{card}' in hand {hand}")
            continue 
        rank = card[1]
        if rank.isdigit(): # 2-9
            total += int(rank)
        elif rank == 'T' or rank in ['J', 'Q', 'K']: # T, J, Q, K
            total += 10
        elif rank == 'A':
            total += 11  # Add 11 for Ace initially
    
    # Decrement until <= max
    while total > max and ace_count > 0:
        total -= 10
        ace_count -= 1
    return total

In [None]:
def simulate_one_dealer_hand(player_total, dealer_visible_card, base_available_deck, max=21, threshold=17):
    """
    Simulates one dealer hand given player stands on player_total.
    Returns: "win", "lose", "draw" for the player.
    """

    current_game_deck = list(base_available_deck) 
    random.shuffle(current_game_deck)

    dealer_hand = [dealer_visible_card]
    hidden_card = current_game_deck.pop(0)
    dealer_hand.append(hidden_card)
    
    dealer_total = calculate_value(dealer_hand, max)

    while dealer_total < threshold:
        if not current_game_deck: 
            break 
        new_card = current_game_deck.pop(0)
        dealer_hand.append(new_card)
        dealer_total = calculate_value(dealer_hand, max)

    # Determine result based on player_total 
    if player_total > max:
        return "lose"

    if dealer_total > max:
        return "win"  
    elif player_total > dealer_total:
        return "win"
    elif player_total == dealer_total:
        return "draw"
    else: 
        return "lose"

In [None]:
def calculate_winrate_with_simulations(directory, simulate=False, threshold=17, max=21):
    """
    Calculates the win rate from Blackjack game logs stored in YAML files,
    optionally augmenting the data with Monte Carlo simulations.

    The function iterates through all '.yaml' files in the specified 'directory'.
    For each file:
    1. It attempts to parse the original game's outcome ("win.", "lose.", "draw.")
       from the last entry in the YAML data, updating overall statistics.
    2. If 'simulate' is True:
        a. It tries to extract the player's initial hand and the dealer's
           visible card from the YAML log (requires 'get_game_state_from_yaml'
           to be implemented correctly based on the YAML structure).
        b. If a valid game state for simulation is extracted (player has a
           standing hand), it runs a fixed number of simulations (10,000).
        c. In each simulation:
            i. The player is assumed to stand on their initial total.
            ii. The dealer's hand is played out using the remaining cards in a
               shuffled deck (excluding player's initial cards and dealer's
               visible card). The dealer hits until their hand total reaches
               'threshold' or exceeds 'max_val'.
            iii. The outcome of the simulation (win, lose, draw for the player)
                 is recorded.
        d. The points from these simulations (1 for win, 0.5 for draw, 0 for loss)
           are added to the overall statistics.

    Finally, it prints the overall total games counted (original + simulations),
    the calculated win rate (as a percentage), and the margin of error for a
    95% confidence interval.

    Args:
        directory (str): The path to the directory containing YAML log files.
        simulate (bool, optional): If True, enables Monte Carlo simulations
                                   for each game log where a valid initial state
                                   can be extracted. Defaults to False.
        threshold (int, optional): The value dealer must hit until.
                                   Defaults to 17 (standard Blackjack dealer rule).
        max_val (int, optional): The maximum hand value before busting.
                                 Defaults to 21 (standard Blackjack).
    """
    
    overall_won_points = 0.0  
    overall_total_games_counted = 0
    
    z_confidence = 1.96  # For 95% confidence interval

    # Define the full deck once
    suits = ['H', 'D', 'C', 'S']
    ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
    full_deck = [s + r for s in suits for r in ranks]

    for filename in os.listdir(directory):
        if filename.endswith(".yaml"):
            filepath = os.path.join(directory, filename)
            print(f"\nProcessing file: {filename}")
            
            player_initial_cards_from_log = None
            dealer_visible_card_from_log = None
            game_config_valid_for_simulation = False

            
            with open(filepath, "r") as file:
                yaml_data = yaml.safe_load(file)

            # Process the original game result from the log file
            original_game_processed_this_file = False
            if isinstance(yaml_data, list) and yaml_data and \
                isinstance(yaml_data[-1], dict) and "final results" in yaml_data[-1]:
                outcome_str = str(yaml_data[-1]["final results"]).strip().lower()
                if outcome_str == "win.":
                    overall_won_points += 1.0
                    original_game_processed_this_file = True
                elif outcome_str == "lose.":
                    overall_won_points += 0.0 # No points for loss
                    original_game_processed_this_file = True
                elif outcome_str == "draw." or outcome_str == "push.":
                    overall_won_points += 0.5
                    original_game_processed_this_file = True
                
                if original_game_processed_this_file:
                    overall_total_games_counted += 1
                    print(f"  Original game outcome: {outcome_str} -> Counted.")
                else:
                    print(f"  Warning: Unknown original outcome '{outcome_str}'. Original game not counted.")
            else:
                print(f"  Warning: 'final results' not found or in unexpected format. Original game not counted.")
            
            if simulate:
                # Extract game state for simulation
                player_initial_cards_from_log, dealer_visible_card_from_log = get_game_state_from_yaml(yaml_data)

                if player_initial_cards_from_log and dealer_visible_card_from_log:
                    player_initial_total = calculate_value(player_initial_cards_from_log, max)
                    print(f"  Extracted for simulation: Player cards {player_initial_cards_from_log} (Total: {player_initial_total}), Dealer visible: {dealer_visible_card_from_log}")

                    if 0 < player_initial_total: # Player must have a valid standing hand
                        game_config_valid_for_simulation = True
                    else:
                        print(f"  Note: Player's initial total ({player_initial_total}) is not valid for simulation. Skipping simulations for this file.")
                else:
                    print(f"  Warning: Could not extract necessary game state. Skipping simulations for this file.")


                # Run simulations if config is valid
                if game_config_valid_for_simulation:
                    sim_exclusions = list(set(player_initial_cards_from_log + [dealer_visible_card_from_log]))
                    
                    valid_deck_for_sim = True
                    for card_excl in sim_exclusions: # Basic check if excluded cards are notionally valid
                        if card_excl not in full_deck:
                            print(f"  Error: Invalid card '{card_excl}' in exclusions. Skipping simulations.")
                            valid_deck_for_sim = False
                            break
                    if not valid_deck_for_sim:
                        continue

                    sim_available_deck = [card for card in full_deck if card not in sim_exclusions]
        
                    num_simulations = 10000
                    sim_wins_points = 0.0

                    print(f"  Running {num_simulations} simulations with player standing on {player_initial_total}...")
                    for _ in range(num_simulations):
                        sim_result = simulate_one_dealer_hand(player_initial_total, 
                                                            dealer_visible_card_from_log, 
                                                            sim_available_deck,
                                                            max=max,
                                                            threshold=threshold)
                        if sim_result == "win":
                            sim_wins_points += 1.0
                        elif sim_result == "draw":
                            sim_wins_points += 0.5
                        # No points for "lose"
                    
                    overall_won_points += sim_wins_points
                    overall_total_games_counted += num_simulations
                    print(f"  Simulations added: {sim_wins_points} points from {num_simulations} games.")

    final_winrate_percentage = (overall_won_points / overall_total_games_counted) * 100 if overall_total_games_counted > 0 else 0
    
    margin_of_error_percentage = 0.0
    if overall_total_games_counted > 0:
        p_hat = overall_won_points / overall_total_games_counted 
        # Margin of error for a proportion 
        variance_p_hat = (p_hat * (1 - p_hat)) / overall_total_games_counted
        margin_of_error_percentage = (z_confidence * (variance_p_hat ** 0.5)) * 100
        
    print(f"\n--- Overall Results ---")
    print(f"Total games included (original + simulations): {overall_total_games_counted}")
    print(f"Overall Win Rate: {final_winrate_percentage:.2f}%")
    print(f"Margin of Error (95% CI): +/- {margin_of_error_percentage:.2f}%")

In [None]:
# Calculate winrate 
directory_path = "../Agent-Pro/data/vanilla_run"
calculate_winrate_with_simulations(directory_path, simulate=True, threshold=17, max=21);