<a href="https://colab.research.google.com/github/frank731/blackjack-ai/blob/main/BlackjackAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Requirements:

In [None]:
!pip install stable_baselines3
!pip install gymnasium

Blackjack Environment

In [None]:
import random
from gymnasium import Env
from gymnasium import spaces
import numpy as np

DECK_COUNT = 1

class BlackjackEnv(Env):
    def __init__(self, render_mode="console"):
        '''
        self.action_space = spaces.Dict({
            "move": spaces.Discrete(3), # 0 stand, 1 hit, 2 double down
            "bet_amount": spaces.Discrete(cur_funds) # Betting amount
        })
        '''
        self.action_space = spaces.Discrete(2) # 0 stand 1 hit
        self.observation_space = spaces.Dict({
            "player_score": spaces.Discrete(22), # Player score
            "player_hand": spaces.MultiDiscrete([14] * 7), # Player's hand, 0 is no card
            "dealer_card": spaces.Discrete(14),  # Dealer's showing card
            "player_ace": spaces.Discrete(2),   # Whether the player has a usable Ace
            "played_cards": spaces.MultiDiscrete([14] * (13 * 4 * DECK_COUNT)) # Discarded cards
            #"cur_funds": spaces.Discrete(1e9) # Current funds
        })
        #self.initial_funds = cur_funds
        #self.cur_funds = cur_funds
        #self.bet_amount = None
        self.deck = None
        self.player_hand = None
        self.dealer_hand = None
        self.played_cards = None
        self.player_ace = None
        self.render_mode = render_mode
    def reset(self, seed=None,
        options=None):
        super().reset(seed=seed)
        #self.cur_funds = self.initial_funds
        #self.bet_amount = 0
        self.played_cards = []
        self.deck = self.create_deck()
        self.reset_game()
        return self._get_obs(), {}
    def step(self, action):
        reward = 0
        terminated = False
        if action:
            self.add_card(True)
            if self.calculate_hand_value(self.player_hand) > 21:
                reward = -1
                terminated = self.reset_game()
        else:
            while self.calculate_hand_value(self.dealer_hand) < 17:
                self.add_card(False)
            player_value = self.calculate_hand_value(self.player_hand)
            dealer_value = self.calculate_hand_value(self.dealer_hand)
            if player_value > 21:
                reward = -1
            elif dealer_value > 21:
                reward = 1
            elif player_value > dealer_value:
                reward = 1
            elif dealer_value > player_value:
                reward = -1
            terminated = self.reset_game()
        return self._get_obs(), reward, terminated, False, {}

    def render(self):
        if self.render_mode == "console":
            print("Player: ")
            self.print_hand(self.player_hand)
            print("Dealer: ")
            self.print_hand(self.dealer_hand)

    def close(self):
        return super().close()

    def _get_obs(self):
        return {"player_score": self.calculate_hand_value(self.player_hand), "player_hand": np.array(self.player_hand + [0] * 7)[:7], "dealer_card": self.dealer_hand[0], "player_ace": int(self.player_ace), "dealer_ace": int(self.dealer_ace), "played_cards": np.array(self.played_cards + [0] * 13 * 4 * DECK_COUNT)[:13 * 4 * DECK_COUNT]}

    def reset_game(self):
        if len(self.deck) < 10:
            return True
        self.player_hand = []
        self.dealer_hand = []
        self.player_ace = False
        self.dealer_ace = False
        self.add_card(True, 2)
        self.add_card(False, 2)
        return False

    def add_card(self, player, count=1):
        for i in range(count):
            card = self.deck.pop()
            self.played_cards.append(card)
            if player:
                self.player_hand.append(card)
                if card == 1:
                    self.player_ace = True
            else:
                self.dealer_hand.append(card)
            if len(self.deck) < 20:
                self.deck = self.create_deck()
                self.played_cards = []

    def create_deck(self):
        # Create a deck of cards
        deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] * 4 * DECK_COUNT
        random.shuffle(deck)
        return deck

    def get_card_value(self, card):
        # Return the numerical value of a card
        if card > 10:
            return 10
        elif card == 1:
            return 11
        else:
            return card

    def calculate_hand_value(self, hand):
        # Calculate the value of a hand
        value = sum(self.get_card_value(card) for card in hand)
        # Adjust for aces
        num_aces = sum(1 for card in hand if card == 1)
        while value > 21 and num_aces > 0:
            value -= 10
            num_aces -= 1
        return value

    def print_hand(self, hand):
        # Print the cards in a hand
        print(hand)
        print("Current score: {}".format(self.calculate_hand_value(hand)))


Testing Environment

In [None]:
import random
from gymnasium import Env
from gymnasium import spaces
import numpy as np

DECK_COUNT = 1

class BlackjackTestEnv(BlackjackEnv):
    def step(self, action):
        reward = 0
        terminated = False
        if action:
            print("Hit")
            self.render()
            self.add_card(True)
            if self.calculate_hand_value(self.player_hand) > 21:
                print("Busted, lose")
                reward = -1
                terminated = self.reset_game()
        else:
            print("Stand")
            while self.calculate_hand_value(self.dealer_hand) < 17:
                self.add_card(False)
                self.render()
            player_value = self.calculate_hand_value(self.player_hand)
            dealer_value = self.calculate_hand_value(self.dealer_hand)
            if player_value > 21:
                reward = -1
                print("Busted, lose")
            elif dealer_value > 21:
                reward = 1
                print("Dealer busted, win")
            elif player_value > dealer_value:
                reward = 1
                print("Win")
            elif dealer_value > player_value:
                reward = -1
                print("Lose")
            terminated = self.reset_game()
        return self._get_obs(), reward, terminated, False, {}

    def render(self):
        if self.render_mode == "console":
            print("Player: ")
            self.print_hand(self.player_hand)
            print("Dealer: ")
            self.print_hand(self.dealer_hand)

Training

In [None]:
from stable_baselines3.dqn.dqn import DQN
#from blackjack import BlackjackEnv
from stable_baselines3 import A2C
from stable_baselines3.common.env_checker import check_env

env = BlackjackEnv()
#env.reset()
#env.step(1)
#print(env._get_obs())
model = DQN("MultiInputPolicy", env, verbose=1)
model.learn(total_timesteps=1e5, progress_bar=True)
model.save("DQN_blackjack")

#vec_env = model.get_env()
#obs = vec_env.reset()
#for i in range(1000):
#    action, _state = model.predict(obs, deterministic=True)
#    obs, reward, done, info = vec_env.step(action)
#    vec_env.render()
#check_env(env, warn=True)

Testing

In [None]:
from stable_baselines3 import A2C
from stable_baselines3.dqn.dqn import DQN
from stable_baselines3.common.evaluation import evaluate_policy
test_env = BlackjackTestEnv()
model = DQN.load("DQN_blackjack", env=test_env)
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=1)
print(mean_reward, std_reward)
#env = BlackjackEnv()
#vec_env = model.get_env()
#obs = vec_env.reset()
#for i in range(1000):
#    action, _state = model.predict(obs, deterministic=True)
#    obs, reward, done, info = vec_env.step(action)
#check_env(env, warn=True)

#print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")