# Learning value-function in Blackjack

In [3]:
using StatsBase

In [25]:
deck = collect(1:13)
deck[end-2:end] .= 10;
println(deck)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]


* **Stick**: Player stops
* **Hit**: Request an additional card

In [349]:
"""
Given a pair of cards, check whether any
of the cards has an ace and its usable. If it's
usable, we turn its value into an 11
"""
function turn_usable_ace(card1, card2)
    has_usable_ace = false
    if card1 == 1 && 11 + card2 <= 21
        card1 = 11
        has_usable_ace = true
    elseif card2 == 1 && card1 + 11 <= 21
        card2 = 11
        has_usable_ace = true
    end
    
    return (card1, card2), has_usable_ace
end


"""
**Fixed strategy**  
The dealer sticks on any sum of 17 or greater, and hits otherwise.
"""
function dealer_strategy(value_cards)
    while value_cards < 17
        value_cards = value_cards + sample(deck)
    end
    
    return value_cards
end


"""
A player makes a decision based on his initial cards
and one card from the dealer (the one he can see).
"""
function player_strategy(value_cards, dealer_one_card, policy)    
    
    value_cards = policy(value_cards, dealer_one_card)
    
    return value_cards
end


"""
Policy in which a player sticks only if his sum
is 20 or 21.
"""
function policy(value_cards, dealer_one_card)
    while value_cards < 20
        value_cards = value_cards + sample(deck)
    end
    return value_cards
end

policy

In [418]:
function blackjack(player_value_cards, dealer_cards)
    reward = 0
    
    # is the dealer allowed to use a 'usable' ace?
    # dealer_value_cards = sum(turn_usable_ace(dealer_cards...)[1])
    dealer_value_cards = sum(dealer_cards)
    
    
    if player_value_cards == 21 && dealer_value_cards != 21
        reward = 1
        return reward
    end
    
    
    player_value_cards = player_strategy(player_value_cards, dealer_cards[1], policy)
    
    if player_value_cards > 21
        reward = -1
    else
        dealer_value_cards = dealer_strategy(dealer_value_cards)
        if dealer_value_cards > 21
            reward = 1
        else
            reward = player_value_cards > dealer_value_cards ? 1 : 0
        end
    end
    
    return reward, (player_value_cards, dealer_value_cards)
end

blackjack (generic function with 1 method)

In [419]:
dealer = sample(deck, 2)
player = sample(deck, 2)

player, has_usable_ace = turn_usable_ace(player...)
player_value_cards = max(12, sum(player))

13

In [428]:
blackjack(player_value_cards, dealer)

(-1, (25, 13))

In [453]:
value_matrix = zeros(2, 11, 11)
count_matrix = zeros(2, 11, 11)

for _ in 1:100
    dealer = sample(deck, 2)
    player = sample(deck, 2)
    
    dealer_shown_card = dealer[1]

    player, has_usable_ace = turn_usable_ace(player...)
    has_usable_ace = has_usable_ace * 1 + 1
    player_value_cards = max(12, sum(player))
        
    reward, final_cards = blackjack(player_value_cards, dealer)
    
    println(has_usable_ace, " ", dealer_shown_card, " ", player_value_cards - 11)
    value_matrix[has_usable_ace, dealer_shown_card, player_value_cards - 11] += reward * 1.0
end

2 2 2
2 3 7
1 10 2
1 8 5
1 1 7
1 10 1


LoadError: BoundsError: attempt to access Int64 at index [2]

In [444]:
value_matrix[2, :, :]

11×11 Matrix{Float64}:
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  -1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [433]:
value_matrix[1, 2, 2]

0.0