In [85]:
# Imports
import pandas as pd
import rlcard

from rlcard.agents import RandomAgent

In [86]:
# Helper functions
def blackjack_value(hand):
    total = 0
    ace_count = 0
    
    for card in hand:
        rank = card[1:]  
        
        if rank == 'A':
            total += 11
            ace_count += 1
        elif rank in {'J', 'Q', 'K', 'T', '10'}:
            total += 10
        elif rank.isdigit() and 2 <= int(rank) <= 9:
            total += int(rank)
    
    while total > 21 and ace_count > 0:
        total -= 10  
        ace_count -= 1
    
    return total

def color_strategy(val):
    """Return a CSS background-color depending on the action."""
    if val == "S":
        return "background-color: gold"      # Stand color
    elif val == "H":
        return "background-color: white"     # Hit color
    else:
        return ""  # No styling by default

In [87]:
# Results table
results = {17: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           16: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           15: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           14: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           13: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           12: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           11: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           10: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            9: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            8: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []}}

In [88]:
# Init the env
env = rlcard.make('blackjack')

print("Number of actions:", env.num_actions)
print("Number of players:", env.num_players)
print("Shape of state:", env.state_shape)
print("Shape of action:", env.action_shape)

Number of actions: 2
Number of players: 1
Shape of state: [[2]]
Shape of action: [None]


In [89]:
# Init the agent
agent = RandomAgent(num_actions=env.num_actions)
env.set_agents([agent])

In [90]:
# Playing the game and recording results
for i in range(5000):
    init_state, _ = env.reset()
    init_state
    state = init_state
    trajectories, payoffs = env.run(is_training=False)

    last_hand_value = 0
    for i, situation in enumerate(trajectories[0][:-1]):
        if (i % 2 == 0): # State
            last_hand_value = int(trajectories[0][i]['obs'][0])
            dealer_value = int(trajectories[0][i]['obs'][1]) 
            dealer_value = dealer_value if dealer_value <= 10 else 'A'
        else: # Action
            action = int(situation)
            if last_hand_value >= 8 and last_hand_value <= 17:
                results[last_hand_value][dealer_value].append(action)
trajectories[0]

[{'obs': array([16,  4]),
  'legal_actions': OrderedDict([(0, None), (1, None)]),
  'raw_obs': {'actions': ('hit', 'stand'),
   'player0 hand': ['C8', 'S8'],
   'dealer hand': ['H4'],
   'state': (['C8', 'S8'], ['H4'])},
  'raw_legal_actions': ['hit', 'stand'],
  'action_record': [(0, 'stand')]},
 np.int64(1),
 {'obs': array([16, 19]),
  'legal_actions': OrderedDict([(0, None), (1, None)]),
  'raw_obs': {'actions': ('hit', 'stand'),
   'player0 hand': ['C8', 'S8'],
   'dealer hand': ['ST', 'H4', 'H5'],
   'state': (['C8', 'S8'], ['ST', 'H4', 'H5'])},
  'raw_legal_actions': ['hit', 'stand'],
  'action_record': [(0, 'stand')]}]

In [91]:
# Convert lists into letters
for row in results.items():
    for key in row[1].keys():
        row[1][key] = 'H' if round(sum(row[1][key]) / len(row[1][key])) == 0 else 'S'

In [92]:
# Visualize results
df = pd.DataFrame.from_dict(results, orient='index')
styled_df = df.style.map(color_strategy)
styled_df

Unnamed: 0,2,3,4,5,6,7,8,9,10,A
17,S,S,S,S,H,S,H,S,H,H
16,S,S,S,S,H,H,S,H,H,H
15,H,H,S,H,S,S,S,S,S,H
14,S,S,H,S,S,S,H,S,S,H
13,H,S,S,H,H,S,S,H,S,H
12,H,H,S,S,S,S,H,S,S,H
11,H,H,H,S,S,H,S,S,H,S
10,S,S,S,H,H,H,S,H,S,H
9,H,H,S,H,H,H,H,S,H,H
8,S,H,S,H,S,H,H,S,H,H
