In [None]:
# Imports
import pandas as pd
import rlcard

from rlcard.agents import RandomAgent

In [3]:
# Helper functions
def blackjack_value(hand):
    total = 0
    ace_count = 0
    
    for card in hand:
        rank = card[1:]  
        
        if rank == 'A':
            total += 11
            ace_count += 1
        elif rank in {'J', 'Q', 'K', 'T', '10'}:
            total += 10
        elif rank.isdigit() and 2 <= int(rank) <= 9:
            total += int(rank)
    
    while total > 21 and ace_count > 0:
        total -= 10  
        ace_count -= 1
    
    return total

def color_strategy(val):
    """Return a CSS background-color depending on the action."""
    if val == "S":
        return "background-color: gold"      # Stand color
    elif val == "H":
        return "background-color: white"     # Hit color
    else:
        return ""  # No styling by default

In [4]:
# Results table
results = {27: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           26: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           25: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           24: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           23: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           22: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           21: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           20: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           19: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           18: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           17: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           16: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           15: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           14: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           13: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           12: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           11: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           10: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            9: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            8: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []}}


In [5]:
# Init the env
env = rlcard.make("blackjack", config={"seed": 196})

print("Number of actions:", env.num_actions)
print("Number of players:", env.num_players)
print("Shape of state:", env.state_shape)
print("Shape of action:", env.action_shape)

Number of actions: 2
Number of players: 1
Shape of state: [[2]]
Shape of action: [None]


In [6]:
# Init the agent
agent = RandomAgent(num_actions=env.num_actions)
env.set_agents([agent])

In [None]:
# Playing the game and recording results
for i in range(5000):
    env = rlcard.make("blackjack")
    env.set_agents([agent])
    trajectories, payoffs = env.run(is_training=False)

    last_hand_value = 0
    for i, situation in enumerate(trajectories[0][:-1]):
        if (i % 2 == 0): # State
            last_hand_value = int(trajectories[0][i]['obs'][0])
            dealer_value = int(trajectories[0][i]['obs'][1]) 
            dealer_value = dealer_value if dealer_value <= 10 else 'A'
        else: # Action
            action = int(situation)
            if last_hand_value >= 8 and last_hand_value <= 27:
                results[last_hand_value][dealer_value].append(action)
trajectories[0]

In [8]:
# Convert lists into letters
for row in results.items():
    for key in row[1].keys():
        row[1][key] = 'H' if round(sum(row[1][key]) / len(row[1][key])) == 0 else 'S'

In [None]:
# Visualize results
df = pd.DataFrame.from_dict(results, orient='index')
styled_df = df.style.map(color_strategy)
styled_df