# BlackJack

## Game rule
- only dealer and single player
- card sum to 21 -> win
- ace as 1 or 11
- face as 10


## State (200개)
- player's cards (12-21)
 - 11까지는 무조건 hit를 할 것이므로 고려대상이 아님
- dealer's showing cards (1-10)
- ace is available (2)

## Action
- hit / stick / bust

## Reward
- rewards of +1, -1, and 0 are given for winning, losing, drawing



# 아래 코드는 다음 공개 코드를 가져온 것임 
[https://github.com/vinayh/rl-sutton-barto/blob/master/ch5.1-ex5.1-blackjack.py](https://github.com/vinayh/rl-sutton-barto/blob/master/ch5.1-ex5.1-blackjack.py)

In [8]:
# Player cards are output to 10s, with dealer showing card to 1s and ace at 101-200
import random

################################################
# Constant declarations and classes
################################################

num_states = 200 # Combinations of 10 sums, 10 dealer cards, and whether ace is usable
hands_to_play = 200000
returns = [0] * (num_states + 1) # Creating array with num_states elements
visits = [0] * (num_states + 1)
stateValues = [0] * (num_states + 1)

class Hand:
    cards = 0
    value = 0
    ace = False
    usableAce = False

    def __init__(self, deck):
        self.cards = deck.twoCards()

    def draw(self, deck):
        self.cards.append(deck.oneCard())

    # Value of hand
    def value(self):
        sum = 0
        for card in self.cards:
            value = getVal(card)
            if value is 1: # In case of ace
                self.ace = True
            sum += value
        if sum <= 11:
            sum += value # For ace
            self.usableAce = True
        return sum

    def stateToPlay(self, showing):
        index = calcStateIndex(self, showing)
        return (index, self.value(), showing, self.usableAce())

    def showing(self):
        return self.cards[0]

    def toDraw(self):
        if self.value() is 20 or 21:
            return False
        else:
            return True

class Deck:
    cards = range(1, 53)

    def __init__(self):
        self.create()

    def create(self):
        self.cards = range(1, 53)
        random.shuffle(self.cards)

    def oneCard(self):
        toReturn = self.cards[0]
        if len(self.cards) <= 1:
            self.create()
        else:
            self.cards = self.cards[1:]
        return toReturn

    def twoCards(self):
        toReturn = self.cards[0:2]
        if len(self.cards) <= 2:
            self.create()
        else:
            self.cards = self.cards[2:]
        return toReturn

################################################
# Functions
################################################
def calcReward(playerValue, dealerValue):
    # If one or both players went bust or tied
    if playerValue > 21:
        return -1
    if dealerValue > 21:
        return 1
    if playerValue == dealerValue:
        return 0

    # If neither player went bust
    if (playerValue > dealerValue):
        return 1
    else:
        return -1

def calcStateIndex(player, showing):
    ace = 0
    value = player.value()
    if player.usableAce:
        ace = 100

    showing = min((showing % 13) + 1, 10)

    #print 'Player value is ', player.value()
    return 10 * (value - 12) + (showing - 1) + ace;

def getVal(card):
    value = (card % 13) + 1 # Get number on card
    value = min(value, 10) # Face cards = 10
    return value



In [7]:
def main():
    for _ in range(hands_to_play):
        statesSeen = [0]

        # Makes a deck, player hand, and dealer hand
        deck = Deck()
        player = Hand(deck)
        dealer = Hand(deck)
        
        # Copies dealer's top card to 'showing', adds to seen states
        showing = dealer.showing()

        if player.value() >= 12 and player.value() <= 21:
            statesSeen[0] = calcStateIndex(player, showing) # Add int to statesSeen
        #statesSeen[0] = player.stateToPlay(showing)

        # Hit from deck, add appropriate state to seen states
        while player.toDraw():
            player.draw(deck)
            statesSeen.append(calcStateIndex(player, showing))

        # Dealer draws if value is < 17
        while dealer.value() < 17:
            dealer.draw(deck)
            
        # Reward from game is calculated from player values
        reward = calcReward(player.value(), dealer.value())

        # For each state that was seen, increment visits and add reward to returns
        for state in statesSeen:
            #if (player.value() >= 12) and (player.value() <= 21):
                returns[state] += reward
                visits[state] = visits[state] + 1

    for index, visit in enumerate(visits):
        #stateValues[index] = float(reward) / visits[index]
        if visit is 0:
            print 0
            continue

        print float(returns[index]) / visit

################################################
# Runtime code
################################################

main()

-0.415993222106
-0.34335443038
-0.326300984529
-0.208841463415
-0.159441587068
-0.0841514726508
-0.461052631579
-0.539503386005
-0.485584218513
-0.535254667392
-0.608895705521
-0.271755725191
-0.279929577465
-0.221374045802
-0.176105508146
-0.115805946792
-0.412480974125
-0.489006823351
-0.466141732283
-0.545066045066
-0.630452022205
-0.291494632535
-0.258592471358
-0.170642201835
-0.11811023622
-0.111811023622
-0.47619047619
-0.475898334794
-0.495246326707
-0.557782515991
-0.623548922056
-0.297609233306
-0.303664921466
-0.184549356223
-0.170212765957
-0.0830258302583
-0.451086956522
-0.48544600939
-0.530909090909
-0.516797054763
-0.58211856171
-0.352007469655
-0.258382642998
-0.261450381679
-0.0879676440849
-0.0945157526254
-0.4036598493
-0.508704061896
-0.496168582375
-0.530238393977
-0.440425531915
-0.225929456625
-0.118025751073
-0.0268595041322
0.0175801447777
-0.0322255790534
-0.115942028986
-0.329100529101
-0.393051031488
-0.409163558871
-0.2037470726
0.111111111111
0.2165898617