In [105]:
# Imports
import copy
import pandas as pd
import rlcard
import rlcard.envs
import yaml

from datetime import datetime
from rlcard.agents import RandomAgent
from openai import OpenAI
from transformers import AutoTokenizer, AutoModelForCausalLM

In [86]:
# Helper functions
def extract_choice(text):
    text = to_lower(text)
    last_hit_index = text.rfind("hit")
    last_stand_index = text.rfind("stand")
    if last_hit_index > last_stand_index:
        return "hit"
    elif last_stand_index > last_hit_index:
        return "stand"
    else:
        return None

def to_lower(str):
    lowercase_string = str.lower()
    return lowercase_string

def card2string(cardList):
    str = ''
    str = ','.join(cardList)
    str = str.replace('C', 'Club ')
    str = str.replace('S', 'Spade ')
    str = str.replace('H', 'Heart ')
    str = str.replace('D', 'Diamond ')
    str = str.replace('T', '10')
    return str

def blackjack_value(hand):
    total = 0
    ace_count = 0
    
    for card in hand:
        rank = card[1:]  
        
        if rank == 'A':
            total += 11
            ace_count += 1
        elif rank in {'J', 'Q', 'K', 'T', '10'}:
            total += 10
        elif rank.isdigit() and 2 <= int(rank) <= 9:
            total += int(rank)
    
    while total > 21 and ace_count > 0:
        total -= 10  
        ace_count -= 1
    
    return total

def color_strategy(val):
    """Return a CSS background-color depending on the action."""
    if val == "S":
        return "background-color: gold"      # Stand color
    elif val == "H":
        return "background-color: white"     # Hit color
    else:
        return ""  # No styling by default

In [88]:
# DeepSeek R1 model
class DEEPSEEKR1:
    def __init__(self) -> None:
        with open("../config.yaml", "r") as file:
            config = yaml.safe_load(file)
            api_key = config["Keys"]["DEEPSEEKR1PAID"]

        self.model = config["IDs"]["DEEPSEEKR1PAID"]

        self.client = OpenAI(
            base_url=config["Providers"]["DEEPSEEK"],
            api_key=api_key,
        )

    def response(self, mes):
        response = self.client.chat.completions.create(
            model=self.model,
            messages=mes)

        return response.choices[0].message.content

In [None]:
# Blackjack Agent
game_style = 'agentpro'
now = datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S")
storage_name = '../Agent-Pro/my_data/DeepSeek R1/' + formatted
response = ''

class LlmAgent(RandomAgent):
    def __init__(self, num_actions):
        super().__init__(num_actions)

    @staticmethod
    def step(state):
        deal_card = state['raw_obs']['dealer hand']
        hand_card = state['raw_obs']['player0 hand']
        llm = DEEPSEEKR1()
        p = []
        begin_info = "You are a player in blackjack. Please beat the dealer and win the game.\n"
        game_rule = "Game Rule:\n1. Please try to get your card total to as close to 21 as possible, without going over, and still having a higher total than the dealer.\n2. If anyone's point total exceeds 21, he or she loses the game. \n3. You can only choose one of the following two actions: {\"Stand\", \"Hit\"}. If you choose to Stand, you will stop taking cards and wait for the dealer to finish. If you choose to Hit, you can continue to take a card, but there is also the risk of losing the game over 21 points. \n4. After all players have completed their hands, the dealer reveals their hidden card. Dealers must hit until their cards total 17 or higher.\n"
        game_info = "The dealer's current card is {" + card2string(deal_card) + "}. The dealer has another hidden card. You don't know what it is. Your current cards are {" + card2string(hand_card) + "}. "

        if game_style == 'Vanilla':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'Radical':
            begin_info = "You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReAct':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReFlexion':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})
            llm_res = llm.response(p)
            p.append({"role": "assistant", "content": llm_res})
            reflexion_info = "Please carefully check the response you just output, and then refine your answer . The final output is also in following format: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": reflexion_info})
            
        if game_style == 'agentpro':
            begin_info = "You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please read the behavoiral guideline and world modeling carefully . Then you should analyze your own cards and your strategies in Self-belief and then analyze the dealer cards in World-belief. Lastly, please select your action from {\"Stand\",\"Hit\"}.### Output Format: Self-Belief is {Belief about youself}. World-Belief is {Belief about the dealer}. My action is {Your action}. Please output in the given format."
            p.append({"role": "user", "content": game_info})
        llm_res = llm.response(p)
        p.append({"role": "assistant", "content": llm_res})
        filename = storage_name + '.yaml'
        with open(filename, "a") as yaml_file:
            yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
        choice = -1
        if extract_choice(llm_res) == "hit":
            choice = 0
        elif extract_choice(llm_res) == "stand":
            choice = 1
        else:
            choice = -1
        return choice

In [90]:
# Results table
results = {17: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           16: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           15: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           14: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           13: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           12: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           11: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           10: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            9: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            8: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []}}

In [91]:
# Dictionary to store found seeds
found_seeds = dict()

In [92]:
# Random agent to make env run
agent = RandomAgent(num_actions=2)

In [93]:
# Searching for seeds to launch evaluation
for i in range(100000):
    env = rlcard.make("blackjack", config={"seed": i})
    env.set_agents([agent])
    trajectories, payoffs = env.run(is_training=False)

    for j, situation in enumerate(trajectories[0][:1]):
        last_hand_value = int(trajectories[0][j]['obs'][0])
        if last_hand_value <= 17 and last_hand_value >= 8:
            dealer_value = int(trajectories[0][j]['obs'][1]) 
            if (last_hand_value, dealer_value) not in found_seeds.keys():
                found_seeds[(last_hand_value, dealer_value)] = [i]
            elif len(found_seeds[(last_hand_value, dealer_value)]) < 10:
                found_seeds[(last_hand_value, dealer_value)].append(i)

In [94]:
# DeepSeek agent
llm_agent = LlmAgent(num_actions=env.num_actions)

In [95]:
def play_game(env):
    trajectories, payoffs = env.run(is_training=False)
    if len(trajectories[0]) != 0:
        final_state = []
        action_record = []
        state = []
        _action_list = []

        for i in range(1):
            final_state.append(trajectories[i][-1])
            state.append(final_state[i]['raw_obs'])

        action_record.append(final_state[i]['action_record'])
        for i in range(1, len(action_record) + 1):
            _action_list.insert(0, action_record[-i])

    last_hand_value = 0
    for i, situation in enumerate(trajectories[0][:-1]):
        if (i % 2 == 0): # State
            last_hand_value = int(trajectories[0][i]['obs'][0])
            dealer_value = int(trajectories[0][i]['obs'][1]) 
            dealer_value = dealer_value if dealer_value <= 10 else 'A'
        else: # Action
            action = int(situation)
            if last_hand_value >= 8 and last_hand_value <= 17:
                results[last_hand_value][dealer_value].append(action)

    res_str = ('dealer {}, '.format(state[0]['state'][1]) +
                'player {}, '.format(state[0]['state'][0]))
    if payoffs[0] == 1:
        final_res = "win."
    elif payoffs[0] == 0:
        final_res = "draw."
    elif payoffs[0] == -1:
        final_res = "lose."
    p = [{"final cards": res_str, "final results": final_res}]
    filename = storage_name + '.yaml'
    with open(filename, "a") as yaml_file:
        yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
    return env.get_payoffs()

In [None]:
# Iterate over each possible situation and store agent's performance
i = 1
for key in sorted(found_seeds.keys()):
    for seed in found_seeds[key][:1]:
        now = datetime.now()
        formatted = now.strftime("%Y-%m-%d %H:%M:%S")
        storage_name = '../Agent-Pro/my_data/DeepSeek R1/' + formatted
        env = rlcard.make("blackjack", config={"seed": seed})
        env.set_agents([llm_agent])
        print(f"Current game: {i}, situation: {key}, seed: {seed}")
        i += 1

Current game: 1, situation: (8, 6), seed: 450
Current game: 2, situation: (8, 7), seed: 133
Current game: 3, situation: (8, 8), seed: 717
Current game: 4, situation: (8, 9), seed: 232
Current game: 5, situation: (8, 10), seed: 13
Current game: 6, situation: (8, 11), seed: 1171
Current game: 7, situation: (9, 2), seed: 304
Current game: 8, situation: (9, 3), seed: 920
Current game: 9, situation: (9, 4), seed: 26
Current game: 10, situation: (9, 5), seed: 334
Current game: 11, situation: (9, 6), seed: 307
Current game: 12, situation: (9, 7), seed: 308
Current game: 13, situation: (9, 8), seed: 551
Current game: 14, situation: (9, 9), seed: 395
Current game: 15, situation: (9, 10), seed: 69
Current game: 16, situation: (9, 11), seed: 354
Current game: 17, situation: (10, 2), seed: 411
Current game: 18, situation: (10, 3), seed: 414
Current game: 19, situation: (10, 4), seed: 9
Current game: 20, situation: (10, 5), seed: 27
Current game: 21, situation: (10, 6), seed: 28
Current game: 22, s

In [106]:
# Convert lists into letters
results_copy = copy.deepcopy(results)
for row in results.items():
    for key in row[1].keys():
        row[1][key] = 'H' if round(sum(row[1][key]) / len(row[1][key])) == 0 else 'S'

In [107]:
# Visualize results
df = pd.DataFrame.from_dict(results, orient='index')
styled_df = df.style.map(color_strategy)
styled_df

Unnamed: 0,2,3,4,5,6,7,8,9,10,A
17,H,S,S,S,S,S,H,H,H,H
16,H,S,H,S,H,H,H,H,H,H
15,H,H,H,H,H,H,H,H,H,H
14,H,H,H,H,H,H,H,H,H,H
13,H,H,H,H,H,H,H,H,H,H
12,H,H,H,H,H,H,H,H,H,H
11,H,H,H,H,H,H,H,H,H,H
10,H,H,H,H,H,H,H,H,H,H
9,H,H,H,H,H,H,H,H,H,H
8,H,H,H,H,H,H,H,H,H,H


In [None]:
# Report winrate

import os
def calculate_winrate(directory):
    won = 0
    total_games = 0

    for filename in os.listdir(directory):
        if filename.endswith(".yaml") or filename.endswith(".yml"):
            filepath = os.path.join(directory, filename)

            with open(filepath, "r") as file:
                data = yaml.safe_load(file)

                if "final results" in data[-1]:
                    total_games += 1
                    if str(data[-1]["final results"]).strip().lower() == "win.":
                        won += 1

    winrate = (won / total_games * 100) if total_games > 0 else 0
    return won, total_games, winrate

directory_path = "../Agent-Pro/my_data/WSB"
wins, total, winrate = calculate_winrate(directory_path)

print(f"Total Wins: {wins}")
print(f"Total Games: {total}")
print(f"Win Rate: {winrate:.2f}%")

Total Wins: 41
Total Games: 109
Win Rate: 37.61%


# Optimal strategy

<div>
    <img src="../optimal_strategy.png" width="500"/>
</div>