# Blackjack with AI player

In [6]:
import random

In [7]:
RANKS = ['2','3','4','5','6','7','8','9','10','J','Q','K','A']

# Card class keeps:
    # rank - The rank of the card [2,3,4,5,6,7,8,9,10,J,Q,K,A]
    # value - The point value associated with the rank
    # isAce - True if the card is an Ace
class Card(object):
    
    def __init__(self, rank = None):
        self.rank = rank
        
        if not rank: # null card
            self.value = 0
            self.isAce = False
            return
        elif rank == 'A':
            self.value = 11
            self.isAce = True
        elif rank in ['J','Q','K']:
            self.value = 10
            self.isAce = False
        else:
            self.value = int(rank) # 2-10
            self.isAce = False
            
    def __str__(self):
        return self.rank
    
    def __eq__(self, card):
        return self.value == card.value
    
    def __repr__(self):
        return self.rank


# Deck class keeps:
    # cards - list of cards remaining in the deck
class Deck(object):
    
    def __init__(self, nDecks = 1):
        self.cards = []
        self.buildDeck(nDecks)
        self.prepareDeck(nDecks)
    
    def buildDeck(self, nDecks):
        for i in range(nDecks):
            for j in range(4): # 4 of each card per deck
                for rank in RANKS:
                    self.cards.append(Card(rank))
    
    def prepareDeck(self, nDecks):
        for i in range(7*nDecks):
            self.shuffle()
            
    def shuffle(self):
        write_index = 0 # to write,
        while write_index < len(self.cards):
            read_index = random.randint(write_index, len(self.cards)-1)
            self.cards[read_index], self.cards[write_index] = self.cards[write_index], self.cards[read_index]
            write_index += 1
        
    def draw(self):
        card = self.cards.pop(0)
        return card
    
    def __len__(self):
        return len(self.cards)
    
    def __str__(self):
        return str([str(card) for card in self.cards])
    
    
# Hand class keeps
    # hand - cards in the hand
    # bet - the bet placed on the hand
class Hand(object):
    
    def __init__(self, card = None, bet = 50):
        self.bet = bet
        self.cards = []
        self.split = False
        if card:
            self.cards.append(card)
        
    def addCard(self, card):
        self.cards.append(card)     
            
    def removeCard(self):
        return self.cards.pop()
    
    def isBust(self):
        if self.value > 21:
            return True
        return False
    
    def isBlackjack(self):
        if len(self.cards) == 2:
            if self.value == 21:
                return True
        return False
    
    def setSplit(self):
        self.split = True
    
    def unSplit(self):
        self.split = False
        
    def calculateValue(self, prop = 'value'):
        value = sum(card.value for card in self.cards)
        aces = sum(card.isAce for card in self.cards)  
        # Aces are initially given a value of 11, they are ammended to 1 if the total valaue is > 21
        while (value > 21) and aces: 
            value -= 10
            aces -= 1
        if (prop == 'aces'):
            return aces
        return value
        
    @property
    def value(self):
        return self.calculateValue()
    
    @property
    def soft(self):
        if self.calculateValue('aces'):
            return True
        return False
              
    def __len__(self):
        return len(self.cards)
    
    def __str__(self):
        return str([str(card) for card in self.cards])
        
    def __repr__(self):
        return str(self.cards)
    
    def  __hash__(self):
        return self.value
    
    def __eq__(self, other):
        if len(self.cards) != len(other.cards):
            return False
        
        for card in self.cards:
            if card not in other.cards:
                return False
        return True

#### Testing Card, Deck, and Hand

## Player and Dealer

In [8]:
# Player class keeps:
    # money
    # hands
class Player:
    
    def __init__(self, money = 5000, bet = 50):
        self.money = money
        self.betAmount = bet
        self.bets = []
        self.hands = [Hand()]
        
    def makeBet(self, handNumber):
        if handNumber >= len(self.bets): # new hand gets a new bet
            self.bets.append(self.betAmount)
        else: # double the bet
            self.bets[handNumber] += self.betAmount
        # remove the bet amount from players money
        self.money -= self.betAmount
            
    def resetBets(self):
        self.bets = []
        
    # True if player has enough money to make a bet, False otherwise
    def hasMoney(self):
        return self.money >= self.betAmount
        
    def hit(self, handNumber, card):
        self.hands[handNumber].addCard(card)

    def doubleDown(self, handNumber, card):
        self.hit(handNumber, card)
        self.makeBet(handNumber)

    def split(self, handNumber, cards):
        newHand = Hand()
        # remove one card from the hand being split and add to newHand
        newHand.addCard(self.hands[handNumber].removeCard())
        # add a second card to the old hand
        self.hands[handNumber].addCard(cards[0])  
        # add a second card to the new hand
        newHand.addCard(cards[1])
        newHand.setSplit()
        # append newHand to players Hands and add bet for the hand
        self.hands.append(newHand)
        self.makeBet(len(self.hands)) # make a new bet for the new hand
        
        
    def showHands(self):
        print("Player has %d hands: \n" % len(self.hands))
        for hand in self.hands:
            print(hand)
        print() 
        
    def __str__(self):
        return str(self.hands)

# Dealer class keeps:
    # deck
    # hand
    # isBust
class Dealer:
    
    def __init__(self, deck, Player):
        self.deck = deck
        self.hand = Hand()
        self.player = Player
    
    def deal(self):
        for i in range(2): # deal each player, including the dealer, 2 cards
            self.hand.addCard(self.deck.draw()) # Deal the dealer a card
            self.player.hands[0].addCard(self.deck.draw()) # Deal the player a card   
        self.player.makeBet(0) # player makes a bet on hand
       
    def play(self):
        while self.hand.value <= 17: # hits until they have 17
            # if soft 17, dealer must hit
            if self.hand.value == 17:
                if self.hand.soft:
                    self.hit()
                else:
                    break
            # less than 17, must hit
            else:
                self.hit()
                      
    def hit(self):
        self.hand.addCard(self.deck.draw())
            
    def stand(self):
        return self.hand
    
    def showHands(self):
        print('Dealers Hand:')
        print(self.hand)

#### Test Player and Dealer

## Valid Moves and Make

In [9]:
# returns a list of valid moves for a hand
def validMoves(player, handNumber):
    moves = ['hit','stand']
    # check that hand has 2 cards and the player has money to make a bet
    if len(player.hands[handNumber].cards) == 2:
        if player.hasMoney(): 
            moves.append('doubleDown')
            if player.hands[handNumber].cards[0].rank == player.hands[handNumber].cards[1].rank: # compare ranks same
                moves.append('split')  
    return moves
    
def makeMove(game, handNumber, move):
    if move == 'hit':
        game.player.hit(handNumber, game.dealer.deck.draw())
        return False
    elif move == 'split':
        game.player.split(handNumber, [game.dealer.deck.draw(), game.dealer.deck.draw()])
        return False
    elif move == 'doubleDown':
        game.player.doubleDown(handNumber, game.dealer.deck.draw())
        return True
    else:
        return True



#### Testing ValidMoves and MakeMove

## Blackjack Game

In [10]:
class Blackjack:
    
    def __init__(self, player, deckSize=6):
        self.player = player
        self.deck = Deck(deckSize)
        self.deckSize = deckSize
        self.dealer = Dealer(self.deck, player) # Remember, dealer holds the deck and deals, not the game
        self.dealer.deal()
        
    def newHand(self):
        self.player.hands = [Hand()]
        self.player.resetBets()
        self.dealer.hand = Hand()
        self.dealer.deal()
        
    def newDeck(self):
        self.dealer.deck = Deck(self.deckSize)
       
    # hand is player's hand
    def gameStatus(self, hand):
        # player bust
        if hand.isBust():
            return -1
        # dealer bust
        if self.dealer.hand.isBust():
            return 1
        # draw
        if hand.value == self.dealer.hand.value:
            return 0
        # player win
        if hand.value > self.dealer.hand.value:
            return 1
        # dealer win
        if hand.value < self.dealer.hand.value:
            return -1
        
        

#### Test Blackjack

## TrainQ

In [11]:
import random
import numpy as np
import copy
import operator

# determines if a greedy move should be taken
def epsilonGreedy (epsilon, Q, player, handNumber, dealerCard, validMovesF):
    validMoves = validMovesF(player, handNumber)
    
    if np.random.uniform() < epsilon: # Random choice
        return random.choice(validMoves)
    else: # Greedy choice
        # Greedy Move
        Qs = np.array([Q.get(stateTuple(player.hands[handNumber], dealerCard, m), -1000) for m in validMoves])
        return validMoves[ np.argmax(Qs) ]

def sortHand(hand):
    sortedHand = [card.rank for card in hand.cards]
    sortedHand.sort()
    return sortedHand
    
    
def stateTuple(hand, dealerCard, move):
    sortedHand = sortHand(hand)
    return (hand.value, dealerCard.value, move)

def updateQ(Q, movesMade, value, learningRate):
    #print(movesMade)
    for i, state in enumerate(movesMade):
        # If new move update Q table with 0
        if state not in Q:
            Q[state] = 0
        elif i < len(movesMade)-1:
            # Update the move with learning rate
            Q[state] += learningRate*value
        else:
            Q[state] += value
    return Q

In [12]:

def trainQ(nRepetitions, learningRate, epsilonDecayRate, validMovesF, makeMoveF):
    epsilon = 1.0
    splits = 0
    
    Q = {}
    outcomes = np.zeros(nRepetitions)
    epsilons = np.zeros(nRepetitions)
    
    for gameNum in range(nRepetitions):
        if gameNum%100 == 0:
            print("Game #%d" %(gameNum))
        epsilon *= epsilonDecayRate  # decay epsilon to move away from random choices
        epsilons[gameNum] = epsilon
        
        # create a game
        player = Player()
        blackjack = Blackjack(player)
        
        done = False
        
        # play some blackjack
        while len(blackjack.deck) >  52: # deck is 6 decks, last deck is a buffer deck
            if blackjack.player.hasMoney(): # make sure player has the funds to play
                blackjack.newHand() # deal a hand
                handNumber = 0
                handsToRemove = []
                movesMade = []
            
                # For each players hand
                while handNumber < len(blackjack.player.hands):
                    done = False
                    
                    handMoves = [] # holds the moves made on this hand
                    if blackjack.player.hands[handNumber].split == True:
                        splitHand = Hand(blackjack.player.hands[handNumber].cards[0])
                        splitHand.addCard(blackjack.player.hands[handNumber].cards[0])
                        handMoves.append(stateTuple(splitHand, blackjack.dealer.hand.cards[1], 'split'))
                        blackjack.player.hands[handNumber].unSplit()

                    # Check for a blackjack
                    if blackjack.player.hands[handNumber].isBlackjack():
                        blackjack.player.money += .5*blackjack.player.bets[handNumber]
                        handsToRemove.append(handNumber)
                        done = True
                    
                    
                    step = 0
                    
                    # Player plays each hand until they stand or bust
                    while not done:
                        step += 1
                        
                        # Determine a move for the players hand
                        move = epsilonGreedy(epsilon, Q, blackjack.player, handNumber, 
                                             blackjack.dealer.hand.cards[1], validMovesF)
                        #if (move == 'split'):
                            #print('split')
                        
                        newGame = copy.deepcopy(blackjack)
                        done = makeMoveF(newGame, handNumber, move)
                            
                        # if new move, add to Q
                        if stateTuple(blackjack.player.hands[handNumber], blackjack.dealer.hand.cards[1], move) not in Q:
                            if (move == 'split'):
                                splits += 1
                            Q[stateTuple(blackjack.player.hands[handNumber], blackjack.dealer.hand.cards[1], move)] = 0                        
                        
                        # if move results in 21, set Q value = to 0
                        if newGame.player.hands[handNumber].value == 21:
                            done = True
                        # if move results in bust, update Q with reinforcement
                        elif newGame.player.hands[handNumber].isBust():
                            done = True
        
                        # store a tuple (hand, dealercard, move) for later, player can only 'see' one of dealers cards
                        handMoves.append(stateTuple(blackjack.player.hands[handNumber], 
                                                    blackjack.dealer.hand.cards[1], move))
                        
                        blackjack = newGame
                            
                    handNumber += 1 # next hand
                    movesMade.append(handMoves)
                
                numHands = len(handsToRemove)
                for i in range(numHands):
                    if i != 0:
                        handsToRemove[i] -= 1*i
                    del blackjack.player.hands[handsToRemove[i]]
                    del blackjack.player.bets[handsToRemove[i]]
                
                # After Player plays all his hands dealer plays
                blackjack.dealer.play()
                #print('moves: ' + str(movesMade))
                
                # After Dealer plays, check all players hands and payout on wins
                for hand,nothing in enumerate (blackjack.player.hands):
                    handStatus = blackjack.gameStatus(blackjack.player.hands[hand])
                    # update Q table
                    Q = updateQ(Q, movesMade[hand], handStatus, learningRate)
                    if handStatus == 0: # tie (push)
                        blackjack.player.money += blackjack.player.bets[hand] # player gets their money back
                    if handStatus == 1: # win
                        blackjack.player.money += 2*blackjack.player.bets[hand] # player gets their money plus winnings
                        
            else:
                break # player doesn't have the funds, game over 
        # update outcome
        outcomes[gameNum] = blackjack.player.money
    print('splits: ' + str(splits))
    return Q, outcomes, epsilons
        
    
    # test trainQ
    # sort hands before tuple
    # update Q - only last one
 
    
def testQ(Q, maxSteps , validMovesF, makeMoveF):
    
    player = Player(50000)
    blackjack = Blackjack(player,2)
    
    numberWins, numberBlackJacks, totalHands, numberPosWins, numberNegWins, numberZeroWins, numberLoses, numbers = playBlackjack(Q, blackjack, maxSteps, validMovesF, makeMoveF)
        
    return (str("Player Money: %d" %(blackjack.player.money)), 
            str("Player Winnings: %d" %(blackjack.player.money - 50000)), 
            str("Number of hands that were Blackjacks: %d" %(numberBlackJacks)),
            str("Win Percentage: %d, totalHands: %d" %(numberWins/totalHands*100, totalHands)),
            str("Number of Wins with Pos Reinforcement: %d" %(numberPosWins)),
            str("Number of Wins with Neg Reinforcement: %d" %(numberNegWins)),
            str("Number of Wins with 0 Reinforcement: %d" %(numberZeroWins)),
            str("Number of Pos, Neg, and Zero States: %s" %(numbers)),
            str("Number of loses: %d" %(numberLoses)))
        
def playBlackjack(Q, blackjack, numberHands, validMovesF, makeMoveF):
    numberWins = totalHands = numberPos = numberNeg = numberZero = numberPosWins = numberNegWins = 0 
    numberLoses = numberZeroWins = numberBlackJacks = 0
    states = []
    for i in range(numberHands):
        if not blackjack.player.hasMoney():
            break

        if len(blackjack.dealer.deck) < 52:
            blackjack.newDeck()
            
        blackjack.newHand() # deal a hand
        handNumber = 0
        handsToRemove = []
        while handNumber < len(blackjack.player.hands):
            done = False
            totalHands += 1
            
            if blackjack.player.hands[handNumber].isBlackjack():
                blackjack.player.money += 2*blackjack.player.bets[handNumber] + .5*blackjack.player.bets[handNumber]
                handsToRemove.append(handNumber)
                numberBlackJacks += 1
                numberWins += 1
                done = True
            
            while not done: 
                validMoves = validMovesF(blackjack.player, handNumber)
                Qs = np.array([Q.get(stateTuple(blackjack.player.hands[handNumber],
                                                blackjack.dealer.hand.cards[1], m), -1000) for m in validMoves])
                move = validMoves[ np.argmax(Qs) ] # choose move
                done = makeMoveF(blackjack, handNumber, move)
                
                if Qs[np.argmax(Qs)] > 0:
                    numberPos += 1
                elif Qs[np.argmax(Qs)] == 0:
                    numberZero += 1
                else:
                    numberNeg += 1
                
                if blackjack.player.hands[handNumber].value == 21:
                    done = True
                if blackjack.player.hands[handNumber].isBust():
                    done = True
                
                if done:
                    states.append(Qs[np.argmax(Qs)])
                   
            handNumber += 1
          
        
        numHands = len(handsToRemove)
        for i in range(numHands):
            if i != 0:
                handsToRemove[i] -= 1*i
            del blackjack.player.hands[handsToRemove[i]]
            del blackjack.player.bets[handsToRemove[i]]
                
        blackjack.dealer.play()
        
        # total hands
        for hand,nothing in enumerate (blackjack.player.hands):
            handStatus = blackjack.gameStatus(blackjack.player.hands[hand])
            if handStatus == 0: # tie (push)
                blackjack.player.money += blackjack.player.bets[hand] # player gets their money backts their money back
            if handStatus == 1: # win
                if states[hand] > 0:
                    numberPosWins += 1
                if states[hand] < 0:
                    numberNegWins += 1
                if states[hand] == 0:
                    numberZeroWins += 1
                numberWins += 1
                blackjack.player.money += 2*blackjack.player.bets[hand] # player gets their money plus winnings
            else:
                numberLoses += 1
    return numberWins, numberBlackJacks, totalHands, numberPosWins, numberNegWins, numberZeroWins, numberLoses, [numberPos, -numberNeg, numberZero]

## Testing

In [13]:

Q,outcomes,_ = trainQ(1000, 0.2, 0.99998, validMoves, makeMove)

print(outcomes)

Game #0
Game #100
Game #200
Game #300
Game #400
Game #500
Game #600
Game #700
Game #800
Game #900
splits: 90
[ 3825.  2875.  4000.  3750.  3525.  3550.  4275.  2875.  3700.  4100.
  3550.  3525.  3450.  3925.  3600.  3900.  3850.  3550.  3975.  3175.
  4075.  3025.  3375.  4300.  3950.  3500.  4000.  3825.  3625.  3350.
  3350.  4375.  3150.  4325.  4275.  3250.  3200.  3825.  3525.  4050.
  4550.  3700.  3675.  4250.  4050.  3300.  4125.  3525.  3500.  3625.
  3950.  3425.  3950.  4225.  3550.  5175.  3600.  4000.  3600.  4050.
  3475.  2750.  3725.  3975.  4125.  3725.  3850.  4050.  3900.  4500.
  3375.  3575.  3250.  4100.  3825.  4225.  3725.  4450.  3875.  3300.
  4025.  3450.  4025.  3925.  3725.  4000.  4250.  3850.  3950.  3925.
  4050.  3250.  3925.  3750.  3525.  3700.  4225.  4300.  2975.  3625.
  3900.  3625.  3625.  3675.  4425.  3325.  3200.  2950.  3600.  3675.
  3125.  3625.  3650.  3775.  4075.  3175.  3925.  3300.  4700.  3600.
  3525.  4025.  2900.  3600.  4075.  32

In [14]:
print(Q)

{(20, 10, 'hit'): -555.7999999999998, (20, 2, 'doubleDown'): -76, (16, 9, 'hit'): -74.40000000000003, (13, 10, 'stand'): -307, (6, 7, 'stand'): -4, (14, 10, 'doubleDown'): -184, (19, 9, 'doubleDown'): -41, (12, 3, 'stand'): -30, (18, 6, 'doubleDown'): -31, (12, 3, 'hit'): -42.400000000000006, (15, 3, 'stand'): -47, (13, 4, 'doubleDown'): -25, (11, 8, 'doubleDown'): 20, (20, 5, 'doubleDown'): -65, (20, 10, 'stand'): 317, (18, 4, 'stand'): 27, (11, 6, 'doubleDown'): 3, (4, 7, 'hit'): 0.2, (15, 7, 'hit'): -51.20000000000002, (19, 7, 'stand'): 91, (13, 10, 'hit'): -181.79999999999973, (19, 10, 'hit'): -334.79999999999967, (11, 10, 'stand'): -117, (8, 6, 'stand'): -19, (4, 11, 'split'): -2.4, (7, 11, 'doubleDown'): -18, (8, 11, 'doubleDown'): -16, (12, 7, 'hit'): -35.79999999999999, (14, 4, 'doubleDown'): -40, (7, 11, 'hit'): -6.600000000000003, (17, 11, 'stand'): -77, (16, 4, 'hit'): -42.80000000000001, (7, 2, 'hit'): -1.4, (15, 2, 'hit'): -62.20000000000007, (19, 2, 'hit'): -78.2000000000

In [39]:
gameStats = testQ(Q, 2000 , validMoves, makeMove)

for stat in gameStats:
    print(stat)

Player Money: 35925
Player Winnings: -14075
Number of hands that were Blackjacks: 101
Win Percentage: 43, totalHands: 2067
Number of Wins with Pos Reinforcement: 778
Number of Wins with Neg Reinforcement: 17
Number of Wins with 0 Reinforcement: 0
Number of Pos, Neg, and Zero States: [696, -1836, 9]
Number of loses: 1171


In [32]:
def handQs(Q, card1, card2, dealercard):
    player = Player()
    player.hands[0].addCard(Card(card1))
    player.hands[0].addCard(Card(card2))
    dealerCard = Card(dealercard)

    valid = validMoves(player, 0)
    Qs = np.array([Q.get(stateTuple(player.hands[0], dealerCard, m), -1000) for m in valid])
    
    return ([card1, card2], dealercard, valid[np.argmax(Qs)])

In [33]:
print('Hand Value is 17+, should always stand: ')
print(handQs(Q, 'K', 'Q', '10'))
print(handQs(Q, 'K', '9', '8'))
print(handQs(Q, '9', '9', '7'))
print(handQs(Q, '8', 'Q', 'Q'))
print(handQs(Q, '8', '9', '6'))
print()

print('Hand Value is 13-16, hit if dealer card is 7 or higher: ')
print(handQs(Q, '10', '3', '7'))
print(handQs(Q, '10', '4', '9')) 
print(handQs(Q, '10', '5', '10')) 
print(handQs(Q, '10', '6', 'A')) 
print()

print('Hand Value is 13-16, stand if dealer card is 6 or lower: ')
print(handQs(Q, '10', '3', '6'))
print(handQs(Q, '10', '4', '5')) 
print(handQs(Q, '10', '5', '3')) 
print(handQs(Q, '10', '6', '2')) 
print()

print('Hand Value is 11, double down unless dealer has an A, then hit: ')
print(handQs(Q, '9', '2', 'A'))
print(handQs(Q, '9', '2', '5')) 
print(handQs(Q, '9', '2', '3')) 
print(handQs(Q, '9', '2', '10')) 
print(handQs(Q, '9', '2', '9')) 
print()

print('Hand Value is 10, double down unless dealer has a 10-A, then hit: ')
print(handQs(Q, '8', '2', 'A'))
print(handQs(Q, '8', '2', 'K')) 
print(handQs(Q, '8', '2', '3')) 
print(handQs(Q, '8', '2', '5')) 
print(handQs(Q, '8', '2', '9')) 
print()

print('Hand Value is 9, double down if dealer has 3-6, otherwise hit: ')
print(handQs(Q, '7', '2', 'A'))
print(handQs(Q, '7', '2', 'K')) 
print(handQs(Q, '7', '2', '9')) 
print(handQs(Q, '7', '2', '3')) 
print(handQs(Q, '7', '2', '5')) 
print()

print('Hand Value is 5-8, always hit')
print(handQs(Q, '5', '2', 'A'))
print(handQs(Q, '6', '2', 'K')) 
print(handQs(Q, '4', '2', '9')) 
print(handQs(Q, '3', '2', '3')) 
print(handQs(Q, '3', '2', '5')) 
print()


Hand Value is 17+, should always stand: 
(['K', 'Q'], '10', 'stand')
(['K', '9'], '8', 'stand')
(['9', '9'], '7', 'stand')
(['8', 'Q'], 'Q', 'stand')
(['8', '9'], '6', 'stand')

Hand Value is 13-16, hit if dealer card is 7 or higher: 
(['10', '3'], '7', 'doubleDown')
(['10', '4'], '9', 'doubleDown')
(['10', '5'], '10', 'doubleDown')
(['10', '6'], 'A', 'doubleDown')

Hand Value is 13-16, stand if dealer card is 6 or lower: 
(['10', '3'], '6', 'stand')
(['10', '4'], '5', 'stand')
(['10', '5'], '3', 'doubleDown')
(['10', '6'], '2', 'stand')

Hand Value is 11, double down unless dealer has an A, then hit: 
(['9', '2'], 'A', 'hit')
(['9', '2'], '5', 'hit')
(['9', '2'], '3', 'hit')
(['9', '2'], '10', 'hit')
(['9', '2'], '9', 'doubleDown')

Hand Value is 10, double down unless dealer has a 10-A, then hit: 
(['8', '2'], 'A', 'hit')
(['8', '2'], 'K', 'hit')
(['8', '2'], '3', 'doubleDown')
(['8', '2'], '5', 'doubleDown')
(['8', '2'], '9', 'doubleDown')

Hand Value is 9, double down if dealer has

Based  on the above results, we have determined that our AI has trouble differintiating between when it should hit and when it should double down. This is unsupprising since doubling down is in essence a hit. It wasn't clear that this phenomenon would occur until after extensive testing. We think this partially explains why our AI is still losing money when it plays after it has learned. One possible fix would be to include implementation in the training to update the double down Q table value for the state if the game wins by hitting only once instead of updating the hit moves Q table value. This might help it to determine that it should double down in states where it wins by hitting only once, which would result in greater winnings for those hands. It could also go the other way, causing it to double down and loose more often and eat away its bankroll more quickly. We found that the element of chance in blackjack is really difficult to overcome with an AI.

In [37]:
print('Always Split Aces and 8s')
print(handQs(Q, 'A', 'A', 'A'))
print(handQs(Q, 'A', 'A', 'K')) 
print(handQs(Q, 'A', 'A', '5')) 
print(handQs(Q, '8', '8', 'A')) 
print(handQs(Q, '8', '8', '10')) 
print(handQs(Q, '8', '8', '6')) 
print()

print('Never Split 10s, Always stand: ')
print(handQs(Q, '10', '10', 'A'))
print(handQs(Q, '10', '10', 'K')) 
print(handQs(Q, '10', '10', '9')) 
print(handQs(Q, '10', '10', '3'))  
print()

print('Split these Hands: ')
print(handQs(Q, '2', '2', '7'))
print(handQs(Q, '3', '3', '2')) 
print(handQs(Q, '4', '4', '6')) 
print(handQs(Q, '6', '6', '5')) 
print(handQs(Q, '7', '7', '3')) 
print(handQs(Q, '9', '9', '6')) 
print(handQs(Q, '9', '9', '4')) 
print()

print('Hit on these Hands: ')
print(handQs(Q, '2', '2', '8'))
print(handQs(Q, '3', '3', '9')) 
print(handQs(Q, '4', '4', 'A'))
print(handQs(Q, '5', '5', 'K')) 
print(handQs(Q, '6', '6', '8')) 
print(handQs(Q, '7', '7', '10')) 
print()

print('Double Down on these hands: ')
print(handQs(Q, '5', '5', '7'))
print(handQs(Q, '5', '5', '5')) 
print()
 
print('Stand on these hands: ')
print(handQs(Q, '10', '10', '7'))
print(handQs(Q, '10', '10', '5')) 
print(handQs(Q, '10', '10', '10'))
print(handQs(Q, '10', '10', 'A')) 
print(handQs(Q, '9', '9', '7')) 
print(handQs(Q, '9', '9', 'A')) 

Always Split Aces and 8s
(['A', 'A'], 'A', 'split')
(['A', 'A'], 'K', 'split')
(['A', 'A'], '5', 'split')
(['8', '8'], 'A', 'split')
(['8', '8'], '10', 'split')
(['8', '8'], '6', 'split')

Never Split 10s, Always stand: 
(['10', '10'], 'A', 'stand')
(['10', '10'], 'K', 'stand')
(['10', '10'], '9', 'stand')
(['10', '10'], '3', 'stand')

Split these Hands: 
(['2', '2'], '7', 'hit')
(['3', '3'], '2', 'hit')
(['4', '4'], '6', 'doubleDown')
(['6', '6'], '5', 'split')
(['7', '7'], '3', 'split')
(['9', '9'], '6', 'stand')
(['9', '9'], '4', 'stand')

Hit on these Hands: 
(['2', '2'], '8', 'hit')
(['3', '3'], '9', 'split')
(['4', '4'], 'A', 'split')
(['5', '5'], 'K', 'split')
(['6', '6'], '8', 'split')
(['7', '7'], '10', 'split')

Double Down on these hands: 
(['5', '5'], '7', 'doubleDown')
(['5', '5'], '5', 'doubleDown')

Stand on these hands: 
(['10', '10'], '7', 'stand')
(['10', '10'], '5', 'stand')
(['10', '10'], '10', 'stand')
(['10', '10'], 'A', 'stand')
(['9', '9'], '7', 'stand')
(['9', 

Our AI has a fairly high success rate when it comes to splitting on the correct hands. For the majority of hands it splits when appropriate and really only struggles to come to the right decision on low pairs and a pair of nines. We actually had quite a bit of trouble with splitting because of the nature of the action. Since splitting a hand results in two new hands, the results of moves on both those hands affect the value of the state that was split in the Q table. This makes training the Q table for splitting extreamely challenging, and we encountered a lot of frustration trying to get this right. As it stands, we are very happy with the number of states the AI determines to splt correctly, due to the complex nature of the action.

In [35]:
# doesnt know anything about how its bets affect wins and losses, so it has trouble between double down and hit. If we did it over we might change some logic.

Blackjack is a game of chance that can be beaten if played by a group of experienced players that know precicely what they are doing. As far as we understand it, blackjack strategy relies on a few things. The players always assume the facedown card of the dealer has a value of 10 and they play accordingly. This is not something we allowed our AI to assume because we wanted to test how well it could learn blackjack strategy with no prior "knoledge" of the games strategy. Had we implemented this feature and allowed it to shoose its moves based on this supposed knowledge it may have performed better in the long run.

Overall, we are very happy with our AI's ability to learn the strategy of blackjack. While it did not get the move choices correct for all states when compared to known blackjack strategy it does choose the coorect move about 70 percent of the time. We can think of two ways to improve our AI in the future. The first is to update double downs value as well as hits when the player wins from a game state that it only hit once. This would likely resolve many of the issues it had trying to determine the difference between when it should hit and double down. The second, is to allow it to play with the assumption that the hidden dealer card is a 10. We could also allow it to assume the next card on the deck is likely a 10. This wouldlead it to play more along  known strategies and likely resolve many of the issues it had with determining whether to split a pair of cards.

Of course, Blackjack has such a huge element of chance that if we really wanted it to win all the time we would have written an AI to count cards and vary its bets based on the remaining contents of the deck.