In [42]:
# Imports
import json
import random
import rlcard
import rlcard.envs
import yaml

from datetime import datetime
from rlcard.agents import RandomAgent
from openai import OpenAI

In [None]:
# Helper functions
def extract_choice(text):
    text = to_lower(text)
    last_hit_index = text.rfind("hit")
    last_stand_index = text.rfind("stand")
    if last_hit_index > last_stand_index:
        return "hit"
    elif last_stand_index > last_hit_index:
        return "stand"
    else:
        return None

def to_lower(str):
    lowercase_string = str.lower()
    return lowercase_string

def card2string(cardList):
    str = ''
    str = ','.join(cardList)
    str = str.replace('C', 'Club ')
    str = str.replace('S', 'Spade ')
    str = str.replace('H', 'Heart ')
    str = str.replace('D', 'Diamond ')
    str = str.replace('T', '10')
    return str

def blackjack_value(hand):
    total = 0
    ace_count = 0
    
    for card in hand:
        rank = card[1:]  
        
        if rank == 'A':
            total += 11
            ace_count += 1
        elif rank in {'J', 'Q', 'K', 'T', '10'}:
            total += 10
        elif rank.isdigit() and 2 <= int(rank) <= 9:
            total += int(rank)
    
    while total > 21 and ace_count > 0:
        total -= 10  
        ace_count -= 1
    
    return total

In [33]:
# DeepSeek R1 model
class DEEPSEEKR1:
    def __init__(self) -> None:
        with open("/Users/fitter0happier/Desktop/Coding/NLP/21Mind/config.yaml", "r") as file:
            config = yaml.safe_load(file)
            api_key = config["Keys"]["DEEPSEEKR1"]

        self.model = config["IDs"]["DEEPSEEKR1"]

        self.client = OpenAI(
            base_url=config["Providers"]["OPENROUTER"],
            api_key=api_key,
        )

    def response(self, mes):
        response = self.client.chat.completions.create(
            model=self.model,
            messages=mes)

        return response.choices[0].message.content

In [34]:
# Blackjack Agent
game_style = 'agentpro'
now = datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S")
storage_name = '../Agent-Pro/my_data/DeepSeek R1/' + formatted

class LlmAgent(RandomAgent):
    def __init__(self, num_actions):
        super().__init__(num_actions)

    @staticmethod
    def step(state):
        deal_card = state['raw_obs']['dealer hand']
        hand_card = state['raw_obs']['player0 hand']
        llm = DEEPSEEKR1()
        p = []
        begin_info = "You are a player in blackjack. Please beat the dealer and win the game.\n"
        game_rule = "Game Rule:\n1. Please try to get your card total to as close to 21 as possible, without going over, and still having a higher total than the dealer.\n2. If anyone's point total exceeds 21, he or she loses the game. \n3. You can only choose one of the following two actions: {\"Stand\", \"Hit\"}. If you choose to Stand, you will stop taking cards and wait for the dealer to finish. If you choose to Hit, you can continue to take a card, but there is also the risk of losing the game over 21 points. \n4. After all players have completed their hands, the dealer reveals their hidden card. Dealers must hit until their cards total 17 or higher.\n"
        game_info = "The dealer's current card is {" + card2string(deal_card) + "}. The dealer has another hidden card. You don't know what it is. Your current cards are {" + card2string(hand_card) + "}. "

        if game_style == 'Vanilla':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'Radical':
            begin_info = "You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please output your action in following format: ###My action is {your action}, without any other text."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReAct':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})

        if game_style == 'ReFlexion':
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please first think and reason about the current hand and then generate your action as follows: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": game_info})
            llm_res = llm.response(p)
            p.append({"role": "assistant", "content": llm_res})
            reflexion_info = "Please carefully check the response you just output, and then refine your answer . The final output is also in following format: ###My thought is {Your Thought}. My action is {your action}."
            p.append({"role": "user", "content": reflexion_info})
            
        if game_style == 'agentpro':
            begin_info = "You are an aggressive player of blackjack who likes to take risks to earn high returns. Please beat the dealer and win the game."
            p.append({"role": "system", "content": begin_info + game_rule})
            game_info += "Please read the behavoiral guideline and world modeling carefully . Then you should analyze your own cards and your strategies in Self-belief and then analyze the dealer cards in World-belief. Lastly, please select your action from {\"Stand\",\"Hit\"}.### Output Format: Self-Belief is {Belief about youself}. World-Belief is {Belief about the dealer}. My action is {Your action}. Please output in the given format."
            p.append({"role": "user", "content": game_info})
        llm_res = llm.response(p)
        p.append({"role": "assistant", "content": llm_res})
        filename = storage_name + '.yaml'
        with open(filename, "a") as yaml_file:
            yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
        choice = -1
        if extract_choice(llm_res) == "hit":
            choice = 0
        elif extract_choice(llm_res) == "stand":
            choice = 1
        else:
            choice = -1
        return choice

In [35]:
# Environment
num_players = 1
env = rlcard.make(
    'blackjack',
    config={'game_num_players': num_players, "seed": random.randint(0, 10**10)}
)

llm_agent = LlmAgent(num_actions=env.num_actions)
env.set_agents([llm_agent])

In [40]:
def play_game(env):
    trajectories, payoffs = env.run(is_training=False)
    print(trajectories)
    if len(trajectories[0]) != 0:
        final_state = []
        action_record = []
        state = []
        _action_list = []

        for i in range(num_players):
            final_state.append(trajectories[i][-1])
            state.append(final_state[i]['raw_obs'])

        action_record.append(final_state[i]['action_record'])
        for i in range(1, len(action_record) + 1):
            _action_list.insert(0, action_record[-i])

    res_str = ('dealer {}, '.format(state[0]['state'][1]) +
                'player {}, '.format(state[0]['state'][0]))
    if payoffs[0] == 1:
        final_res = "win."
    elif payoffs[0] == 0:
        final_res = "draw."
    elif payoffs[0] == -1:
        final_res = "lose."
    p = [{"final cards": res_str, "final results": final_res}]
    filename = storage_name + '.yaml'
    with open(filename, "a") as yaml_file:
        yaml.dump(p, yaml_file, default_flow_style=False, allow_unicode=True)
    return env.get_payoffs()

In [41]:
now = datetime.now()
formatted = now.strftime("%Y-%m-%d %H:%M:%S")
storage_name = '../Agent-Pro/my_data/DeepSeek R1/' + formatted
play_game(env)

[[{'obs': array([12,  9]), 'legal_actions': OrderedDict([(0, None), (1, None)]), 'raw_obs': {'actions': ('hit', 'stand'), 'player0 hand': ['S7', 'C5'], 'dealer hand': ['H9'], 'state': (['S7', 'C5'], ['H9'])}, 'raw_legal_actions': ['hit', 'stand'], 'action_record': [(0, 'hit'), (0, 'stand')]}, 0, {'obs': array([17,  9]), 'legal_actions': OrderedDict([(0, None), (1, None)]), 'raw_obs': {'player0 hand': ['S7', 'C5', 'S5'], 'dealer hand': ['H9'], 'actions': ('hit', 'stand'), 'state': (['S7', 'C5', 'S5'], ['H9'])}, 'raw_legal_actions': ['hit', 'stand'], 'action_record': [(0, 'hit'), (0, 'stand')]}, 1, {'obs': array([17, 18]), 'legal_actions': OrderedDict([(0, None), (1, None)]), 'raw_obs': {'actions': ('hit', 'stand'), 'player0 hand': ['S7', 'C5', 'S5'], 'dealer hand': ['H2', 'H9', 'C7'], 'state': (['S7', 'C5', 'S5'], ['H2', 'H9', 'C7'])}, 'raw_legal_actions': ['hit', 'stand'], 'action_record': [(0, 'hit'), (0, 'stand')]}]]


array([-1])