# BlackJack

## Game rule
- only dealer and single player
- card sum to 21 -> win
- ace as 1 or 11
- face as 10


## State (200개)
- player's cards (12-21)
 - 11까지는 무조건 hit를 할 것이므로 고려대상이 아님
- dealer's showing cards (1-10)
- ace is available (2)

## Action
- hit / stick / bust

## Reward
- rewards of +1, -1, and 0 are given for winning, losing, drawing



# 아래 코드는 다음 공개 코드를 가져온 것임 
[https://github.com/vinayh/rl-sutton-barto/blob/master/ch5.1-ex5.1-blackjack.py](https://github.com/vinayh/rl-sutton-barto/blob/master/ch5.1-ex5.1-blackjack.py)

In [9]:
# Player cards are output to 10s, with dealer showing card to 1s and ace at 101-200
import random

################################################
# Constant declarations and classes
################################################

num_states = 200 # Combinations of 10 sums, 10 dealer cards, and whether ace is usable
hands_to_play = 200000
returns = [0] * (num_states + 1) # Creating array with num_states elements
visits = [0] * (num_states + 1)
stateValues = [0] * (num_states + 1)

class Hand:
    cards = 0
    value = 0
    ace = False
    usableAce = False

    def __init__(self, deck):
        self.cards = deck.twoCards()

    def draw(self, deck):
        self.cards.append(deck.oneCard())

    # Value of hand
    def value(self):
        sum = 0
        for card in self.cards:
            value = getVal(card)
            if value is 1: # In case of ace
                self.ace = True
            sum += value
        if sum <= 11:
            sum += value # For ace
            self.usableAce = True
        return sum

    def stateToPlay(self, showing):
        index = calcStateIndex(self, showing)
        return (index, self.value(), showing, self.usableAce())

    def showing(self):
        return self.cards[0]

    def toDraw(self):
        if self.value() is 20 or 21:
            return False
        else:
            return True

class Deck:
    cards = range(1, 53)

    def __init__(self):
        self.create()

    def create(self):
        self.cards = range(1, 53)
        random.shuffle(self.cards)

    def oneCard(self):
        toReturn = self.cards[0]
        if len(self.cards) <= 1:
            self.create()
        else:
            self.cards = self.cards[1:]
        return toReturn

    def twoCards(self):
        toReturn = self.cards[0:2]
        if len(self.cards) <= 2:
            self.create()
        else:
            self.cards = self.cards[2:]
        return toReturn

################################################
# Functions
################################################
def calcReward(playerValue, dealerValue):
    # If one or both players went bust or tied
    if playerValue > 21:
        return -1
    if dealerValue > 21:
        return 1
    if playerValue == dealerValue:
        return 0

    # If neither player went bust
    if (playerValue > dealerValue):
        return 1
    else:
        return -1

def calcStateIndex(player, showing):
    ace = 0
    value = player.value()
    if player.usableAce:
        ace = 100

    showing = min((showing % 13) + 1, 10)

    #print 'Player value is ', player.value()
    return 10 * (value - 12) + (showing - 1) + ace;

def getVal(card):
    value = (card % 13) + 1 # Get number on card
    value = min(value, 10) # Face cards = 10
    return value



In [10]:
def main():
    for _ in range(hands_to_play):
        statesSeen = [0]

        # Makes a deck, player hand, and dealer hand
        deck = Deck()
        player = Hand(deck)
        dealer = Hand(deck)
        
        # Copies dealer's top card to 'showing', adds to seen states
        showing = dealer.showing()

        if player.value() >= 12 and player.value() <= 21:
            statesSeen[0] = calcStateIndex(player, showing) # Add int to statesSeen
        #statesSeen[0] = player.stateToPlay(showing)

        # Hit from deck, add appropriate state to seen states
        while player.toDraw():
            player.draw(deck)
            statesSeen.append(calcStateIndex(player, showing))

        # Dealer draws if value is < 17
        while dealer.value() < 17:
            dealer.draw(deck)
            
        # Reward from game is calculated from player values
        reward = calcReward(player.value(), dealer.value())

        # For each state that was seen, increment visits and add reward to returns
        for state in statesSeen:
            #if (player.value() >= 12) and (player.value() <= 21):
                returns[state] += reward
                visits[state] = visits[state] + 1

    for index, visit in enumerate(visits):
        #stateValues[index] = float(reward) / visits[index]
        if visit is 0:
            print 0
            continue

        print float(returns[index]) / visit

################################################
# Runtime code
################################################

main()

-0.409909326425
-0.321115537849
-0.274871039057
-0.224278312361
-0.128684399712
-0.0826086956522
-0.44301994302
-0.503865073788
-0.499274310595
-0.544709654435
-0.622848200313
-0.368571428571
-0.320788530466
-0.194765204003
-0.181957186544
-0.0934656741108
-0.451540195342
-0.50593824228
-0.517628205128
-0.538046619149
-0.592105263158
-0.359497645212
-0.324301439458
-0.149056603774
-0.145884270579
-0.162029459902
-0.404255319149
-0.49293433084
-0.51186440678
-0.533724340176
-0.65306122449
-0.385474860335
-0.296779808529
-0.257805530776
-0.150826446281
-0.0729927007299
-0.405156537753
-0.440860215054
-0.496441281139
-0.549966009517
-0.656504065041
-0.29582929195
-0.300685602351
-0.160697887971
-0.154362416107
-0.149769585253
-0.429980276134
-0.48524923703
-0.429679922405
-0.537090432503
-0.502032520325
-0.178533475027
-0.0643564356436
-0.0705128205128
0.0122324159021
-0.0130522088353
-0.0755395683453
-0.309989701339
-0.354104846686
-0.402718550107
-0.183371298405
0.110436893204
0.2472594