In [5]:
# Imports
import pickle
import random
import rlcard
import rlcard.envs
import yaml

from datetime import datetime
from rlcard.agents import RandomAgent
from openai import OpenAI

In [6]:
# Notebook number
id = 12

In [7]:
# Helper functions
def extract_choice(text):
    text = to_lower(text)
    last_hit_index = text.rfind("hit")
    last_stand_index = text.rfind("stand")
    if last_hit_index > last_stand_index:
        return "hit"
    elif last_stand_index > last_hit_index:
        return "stand"
    else:
        return None

def to_lower(str):
    lowercase_string = str.lower()
    return lowercase_string

def card2string(cardList):
    str = ''
    str = ','.join(cardList)
    str = str.replace('C', 'Club ')
    str = str.replace('S', 'Spade ')
    str = str.replace('H', 'Heart ')
    str = str.replace('D', 'Diamond ')
    str = str.replace('T', '10')
    return str

def blackjack_value(hand):
    total = 0
    ace_count = 0
    
    for card in hand:
        rank = card[1:]  
        
        if rank == 'A':
            total += 11
            ace_count += 1
        elif rank in {'J', 'Q', 'K', 'T', '10'}:
            total += 10
        elif rank.isdigit() and 2 <= int(rank) <= 9:
            total += int(rank)
    
    while total > 21 and ace_count > 0:
        total -= 10  
        ace_count -= 1
    
    return total

def color_strategy(val):
    """Return a CSS background-color depending on the action."""
    if val == "S":
        return "background-color: gold"      # Stand color
    elif val == "H":
        return "background-color: white"     # Hit color
    else:
        return ""  # No styling by default

In [8]:
# DeepSeek R1 model
class DEEPSEEKR1:
    def __init__(self) -> None:
        with open("../config.yaml", "r") as file:
            config = yaml.safe_load(file)
            api_key = config["Keys"]["DEEPSEEKR1PAID"]

        self.model = config["IDs"]["DEEPSEEKR1PAID"]

        self.client = OpenAI(
            base_url=config["Providers"]["DEEPSEEK"],
            api_key=api_key,
        )

    def response(self, mes):
        response = self.client.chat.completions.create(
            model=self.model,
            messages=mes)

        return response.choices[0].message.content

In [9]:
# Blackjack Agent
game_style = 'agentpro'
now = datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S")
storage_name = '../Agent-Pro/my_data/DeepSeek R1/' + formatted
response = ''

class LlmAgent(RandomAgent):
    def __init__(self, num_actions):
        super().__init__(num_actions)
        self.llm = DEEPSEEKR1()

    # @staticmethod
    def step(self, state):
        deal_card = state['raw_obs']['dealer hand']
        hand_card = state['raw_obs']['player0 hand']
        p = []
        begin_info = "You are a player in blackjack. Please beat the dealer and win the game.\n"
        game_rule = "Game Rule:\n1. Please try to get your card total to as close to 21 as possible, without going over, and still having a higher total than the dealer.\n2. If anyone's point total exceeds 21, he or she loses the game. \n3. You can only choose one of the following two actions: {\"Stand\", \"Hit\"}. If you choose to Stand, you will stop taking cards and wait for the dealer to finish. If you choose to Hit, you can continue to take a card, but there is also the risk of losing the game over 21 points. \n4. After all players have completed their hands, the dealer reveals their hidden card. Dealers must hit until their cards total 17 or higher.\n"
        game_info = "The dealer's current card is {" + card2string(deal_card) + "}. The dealer has another hidden card. You don't know what it is. Your current cards are {" + card2string(hand_card) + "}. "

        if game_style == 'Vanilla':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'Radical':
            begin_info = "You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReAct':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReFlexion':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})
            llm_res = self.llm.response(p)
            p.append({"role": "assistant", "content": llm_res})
            reflexion_info = "Please carefully check the response you just output, and then refine your answer . The final output is also in following format: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": reflexion_info})
            
        if game_style == 'agentpro':
            begin_info = "I will describe the situation. You have to reason through this in 3-5 steps, then stop. The description begins now. You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            game_info += "Please read the behavoiral guideline and world modeling carefully. Then you should analyze your own cards and your strategies in Self-belief and then analyze the dealer cards in World-belief. Lastly, please select your action from {\"Stand\",\"Hit\"}.### Output Format: Self-Belief is {Belief about youself}. World-Belief is {Belief about the dealer}. My action is {Your action}. Please output in the given format. Do not write anything else."
            p.append({"role": "user", "content": begin_info + game_rule + game_info})
        llm_res = self.llm.response(p)
        p.append({"role": "assistant", "content": llm_res})
        filename = storage_name + '.yaml'
        with open(filename, "a") as yaml_file:
            yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
        choice = -1
        if extract_choice(llm_res) == "hit":
            choice = 0
        elif extract_choice(llm_res) == "stand":
            choice = 1
        else:
            choice = -1
        return choice

In [10]:
# Results table
results = {17: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           16: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           15: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           14: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           13: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           12: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           11: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
           10: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            9: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []},
            8: {2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 'A': []}}

In [11]:
# DeepSeek agent
llm_agent = LlmAgent(num_actions=2)

In [12]:
def play_game(env):
    trajectories, payoffs = env.run(is_training=False)
    if len(trajectories[0]) != 0:
        final_state = []
        action_record = []
        state = []
        _action_list = []

        for i in range(1):
            final_state.append(trajectories[i][-1])
            state.append(final_state[i]['raw_obs'])

        action_record.append(final_state[i]['action_record'])
        for i in range(1, len(action_record) + 1):
            _action_list.insert(0, action_record[-i])

    last_hand_value = 0
    for i, situation in enumerate(trajectories[0][:-1]):
        if (i % 2 == 0): # State
            last_hand_value = int(trajectories[0][i]['obs'][0])
            dealer_value = int(trajectories[0][i]['obs'][1]) 
            dealer_value = dealer_value if dealer_value <= 10 else 'A'
        else: # Action
            action = int(situation)
            if last_hand_value >= 8 and last_hand_value <= 17:
                results[last_hand_value][dealer_value].append(action)

    res_str = ('dealer {}, '.format(state[0]['state'][1]) +
                'player {}, '.format(state[0]['state'][0]))
    if payoffs[0] == 1:
        final_res = "win."
    elif payoffs[0] == 0:
        final_res = "draw."
    elif payoffs[0] == -1:
        final_res = "lose."
    p = [{"final cards": res_str, "final results": final_res}]
    filename = storage_name + '.yaml'
    with open(filename, "a") as yaml_file:
        yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
    return env.get_payoffs()

In [13]:
# Random games
for i in range(1, 301):
    now = datetime.now()
    formatted = now.strftime("%Y-%m-%d %H:%M:%S.%f")
    storage_name = '../Agent-Pro/my_data/WSB_run2/' + formatted
    env = rlcard.make(
    'blackjack',
    config={
        'game_num_players': 1,
        "seed": random.randint(0, 10**10)
    })
    env.set_agents([llm_agent])
    play_game(env)
    print(f"Current game: {i}")
    if i % 50 == 0:
        with open(f"../dicts/id_{id}_data_{i}.pkl", "wb") as f:
            pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL)

Current game: 1
Current game: 2
Current game: 3
Current game: 4
Current game: 5
Current game: 6
Current game: 7
Current game: 8
Current game: 9
Current game: 10
Current game: 11
Current game: 12
Current game: 13
Current game: 14
Current game: 15
Current game: 16
Current game: 17
Current game: 18
Current game: 19
Current game: 20
Current game: 21
Current game: 22
Current game: 23
Current game: 24
Current game: 25
Current game: 26
Current game: 27
Current game: 28
Current game: 29
Current game: 30
Current game: 31
Current game: 32
Current game: 33
Current game: 34
Current game: 35
Current game: 36
Current game: 37
Current game: 38
Current game: 39
Current game: 40
Current game: 41
Current game: 42
Current game: 43
Current game: 44
Current game: 45
Current game: 46
Current game: 47
Current game: 48
Current game: 49
Current game: 50
Current game: 51
Current game: 52
Current game: 53
Current game: 54
Current game: 55
Current game: 56
Current game: 57
Current game: 58
Current game: 59
Curren