In [3]:
import numpy as np

In [4]:
from enum import Enum
import random 

class Color(Enum):
    HEART = 1
    DIAMOND = 2
    SPADE = 3
    CLUB = 4


class Action(Enum):
    HIT = 1
    STICK = 2

class Value(Enum):
    ACE = 1
    TWO = 2
    THREE = 3
    FOUR = 4
    FIVE = 5
    SIX = 6 
    SEVEN = 7
    EIGHT = 8
    NINE = 9
    TEN = 10
    JACK = 11
    QUEEN = 12
    KING = 13


class Card:
    def __init__(self, color, value):
        self.color = color
        self.value = value

    def __str__(self):
        return f"{self.value.name} of {self.color.name}"


class Deck:
    def __init__(self):
        self.deck = [
            Card(color,value) for color in Color for value in Value
        ]

    def deal_a_card(self):
        return random.choice(self.deck)



def blackjack_value(card):
    return min(card.value.value, 10)


def is_ace_usable(hand):
    for card in hand: 
        if card.value == Value.ACE:
            return sum([blackjack_value(card) for card in hand]) <= 11
    return False


def get_hand_value(hand):
    factual_value = sum([blackjack_value(card) for card in hand]) 
    if is_ace_usable(hand):
        return factual_value + 10
    return factual_value


class Blackjack:
    def __init__(self):
        self.players_hand = []
        self.dealers_hand = []
        self.deck = Deck()
        
    def initialize(self):
        self.players_hand = []
        self.players_hand.append(self.deck.deal_a_card())
        self.players_hand.append(self.deck.deal_a_card())

        self.dealers = []
        self.dealers_hand.append(self.deck.deal_a_card())
        self.dealers_hand.append(self.deck.deal_a_card())

    def get_current_state(self):
        return get_hand_value(self.players_hand), is_ace_usable(self.players_hand),blackjack_value(self.dealers_hand[0])


    def simulate_episode(self,policy):
        self.initialize()

        
        # players turn
        players_value, usable_ace, dealers_card = self.get_current_state()
        #action = policy(players_value, usable_ace, dealers_card )

        while players_value < 12:
            self.players_hand.append(self.deck.deal_a_card())
            players_value, usable_ace, dealers_card = self.get_current_state()
            
        action = policy(players_value, usable_ace, dealers_card)

        
        while action == Action.HIT and players_value < 21:
            yield (players_value, usable_ace, dealers_card),Action.HIT,0
            self.players_hand.append(self.deck.deal_a_card())
            players_value, usable_ace, dealers_card = self.get_current_state()
            action = policy(players_value, usable_ace, dealers_card)


        #print(players_value)

            
        if players_value > 21:
            yield (players_value, usable_ace, dealers_card),Action.HIT,-1

        else:
            # dealer's turn
            dealers_value = get_hand_value(self.dealers_hand)
                
            while dealers_value < 17:
                self.dealers_hand.append(self.deck.deal_a_card())
                dealers_value = get_hand_value(self.dealers_hand)
    
            #print(dealers_value)
            if dealers_value > 21:
                yield (players_value, usable_ace, dealers_card),Action.STICK,1
            elif dealers_value == players_value:
                yield (players_value, usable_ace, dealers_card),Action.STICK,0 
            elif dealers_value < players_value:
                yield (players_value, usable_ace, dealers_card),Action.STICK,1
            elif dealers_value > players_value:
                yield (players_value, usable_ace, dealers_card),Action.STICK,-1


def optimistic_policy(value, ace_usable, dealers_card):
    return Action.STICK if value >= 20 else Action.HIT


            

In [5]:
blackjack = Blackjack()

blackjack.simulate_episode(optimistic_policy)

for ret in blackjack.simulate_episode(optimistic_policy):
    print(ret)

((12, False, 9), <Action.HIT: 1>, 0)
((22, False, 9), <Action.HIT: 1>, -1)


In [18]:
states = {(value,usable_ace,dealer_card) for value in range(12,22) for usable_ace in [True,False] for dealer_card in range(1,11)}


def first_visit_MC(policy,simulator,discount,states, number_of_episodes = 100000):
    V = {state: 0 for state in states}
    
    n_visits = {state: 0 for state in states}

    for _ in range(number_of_episodes):
        G = 0
        full_sequence = list(simulator.simulate_episode(policy))
        states_sequence = [element[0] for element in full_sequence]
        reward_sequence = [element[2] for element in full_sequence]

        first_visit_G = {}
        for t in range(len(full_sequence)-2,-1,-1):
            G = discount*G + reward_sequence[t+1]
            state = states_sequence[t]
            first_visit_G[state] = G
            
        for state,G in first_visit_G.items():
            n_visits[state] += 1
            n = n_visits[state]
            V[state] = (n-1)/n * V[state] + 1/n * G
        
    return V

In [19]:
V = first_visit_MC(optimistic_policy,blackjack,1,states,number_of_episodes=10000)

In [20]:
V

{(16, False, 1): 0,
 (13, True, 2): 0,
 (18, True, 6): 0,
 (20, True, 7): 0,
 (19, False, 9): -0.6600411240575729,
 (15, True, 5): 0,
 (17, True, 7): 0,
 (17, False, 9): -0.6229852838121935,
 (12, False, 7): 0,
 (14, True, 3): 0,
 (18, False, 7): 0,
 (16, True, 5): 0,
 (21, False, 5): 0,
 (15, False, 6): 0,
 (13, True, 4): 0,
 (18, True, 8): 0,
 (19, False, 2): 0,
 (20, True, 9): 0,
 (17, False, 2): 0,
 (15, True, 7): 0,
 (17, True, 9): -0.3440860215053763,
 (12, False, 9): -0.4680851063829788,
 (14, True, 5): 0,
 (18, False, 9): -0.6524216524216515,
 (16, True, 7): 0,
 (21, False, 7): 0,
 (15, False, 8): 0,
 (13, True, 6): 0,
 (13, False, 8): 0,
 (18, True, 1): 0,
 (18, True, 10): 0,
 (20, True, 2): 0,
 (12, True, 4): 0,
 (19, False, 4): 0,
 (17, True, 2): 0,
 (17, False, 4): 0,
 (12, False, 2): 0,
 (18, False, 2): 0,
 (14, True, 7): 0,
 (16, True, 9): -0.32544378698224835,
 (15, False, 1): 0,
 (21, False, 9): 0,
 (13, False, 1): 0,
 (15, False, 10): 0,
 (13, True, 8): 0,
 (13, False,

In [9]:
deck.deal_a_card().value

NameError: name 'deck' is not defined