<a href="https://colab.research.google.com/github/basselkassem/easy21/blob/master/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libs

In [0]:
import numpy as np
import matplotlib.pyplot as plt

#Card Class

In [0]:
class Card:
    def __init__(self, value, color):
        self.value = value
        self.color = color
        
    def display(self):
        print('color:', self.color, 'value:', self.value)
        
    def get_value(self):
        if self.color == 'red':
            return -self.value
        else:
            return self.value

#CardDeck Class

In [0]:
class CardDeck:
    def __init__(self):
        pass
    
    @staticmethod
    def draw():
        red_card_prop = 1 / 3
        color = ''
        if np.random.random() < red_card_prop:
            color = 'red'
        else:
            color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card
    
    @staticmethod
    def draw_black_card():
        color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card

#Player Class

In [0]:
class Player:
    def __init__(self):
        self.cards = []
        self.score = 0
        self.busted = False

    def stick(self):
        pass

    def hit(self, score):
        new_card = CardDeck.draw()
        self.add_card(new_card)
        self.score = score + new_card.get_value()
        
    def compute_score(self):
        score = np.sum([card.get_value() for card in self.cards])
        return score

    def is_busted(self):
        score = self.score
        if score < 1 or score > 21:
            self.busted = True
        else:
            self.busted =  False
        return self.busted
    
    def add_card(self, card):
        if card != None:
            self.cards.append(card)
        else:
            print('Can not add null card to player')
            
    def display_status(self):
        card_values = [card.get_value() for card in self.cards]
        print(card_values, ':', self.score)
        self.is_busted()
        print('Busted:', self.busted)

# State Class

In [0]:
class State:
    def __init__(self, dealer_first_card, player_score):
        self.dealer_first_card = dealer_first_card
        self.player_score = player_score
        
    def is_terminal(self):
        if self.player_score < 1 or self.player_score > 21:
            return True
        else:
            return False

    def display(self):
        print('Dealer first card:')
        self.dealer_first_card.display()
        print('Player Score:', self.player_score)


# Action Class

In [0]:
class Action:
    def __init__(self, index):
        self.index = index
        if index == 0:
            self.name = 'hit'
        elif index == 1:
            self.name = 'stick'
        else:
            self.name = 'unknown'

hit = Action(0)
stick = Action(1)

# Environment Class

In [0]:
class Environment:

    def __init__(self):
        self.dealer = Player()
        self.player = Player()
        self.next_state = None
        self.reward = 0
        self.time_step = 0
        self.done = False
    
    def sample_action(self):
        return np.random.randint(hit.index, stick.index + 1)
        
    def reward_func(self):
        if self.player.is_busted():
            return -1
        if self.dealer.is_busted():
            return 1
        return 0
    
    def is_done(self):
        if len(self.player.cards) > 0 and len(self.dealer.cards) > 0:
            if self.player.is_busted() or self.dealer.is_busted():
                self.done = True
            else:
                self.done = False
        return self.done
    
    def init(self):
        player_first_card = CardDeck.draw_black_card()
        dealer_first_card = CardDeck.draw_black_card()
        
        self.dealer.cards.append(dealer_first_card)
        self.dealer.score = dealer_first_card.get_value()

        self.player.cards.append(player_first_card)
        self.player.score = player_first_card.get_value()
        
        new_state = State(dealer_first_card, self.player.score)
        self.next_state = new_state

        return (self.next_state, self.reward, self.done)
    
    def dealer_step(self, score):
        self.dealer.hit(score)
        self.reward = self.reward_func()
        self.is_done()
    
    def player_step(self, score):
        self.player.hit(score)
        new_state = State(self.dealer.cards[0], self.player.score)
        self.next_state = new_state
        self.reward = self.reward_func()
        self.is_done()
        
    def dealer_policy(self):
        action = None
        dealer_score = self.dealer.score
        if dealer_score >= 17:
            action = stick
        else:
            action = hit
        return action
    
    def handel_two_sticks(self):
        if self.dealer.score > self.player.score:
            self.reward = -1
        elif self.dealer.score < self.player.score:
            self.reward = 1
        else:
            self.reward = 0
    
    def display_info(self, name, action):
        if name == 'Player':
            print('Player: ', action.name)
            self.player.display_status()
            print('Dealer: not its turn yet')
            self.dealer.display_status()
        else:
            print('Player: Sticked')
            self.player.display_status()
            print('Dealer: ', action.name)
            self.dealer.display_status()
        print('reward:', self.reward, 'is_done:', self.done)
        print()        
    
    def step(self, state = None, player_action = -1):
        dealer_first_card = state.dealer_first_card
        player_score = state.player_score
        if not self.done:
            self.time_step += 1
            if player_action == hit.index:
                self.player_step(player_score)
                self.display_info('Player', Action(player_action))
            elif player_action == stick.index:
                self.dealer.score = dealer_first_card.get_value()
                while not(self.dealer.is_busted() or self.done):
                    dealer_action = self.dealer_policy()
                    if dealer_action.index == hit.index:
                        score = self.dealer.score
                        self.dealer_step(score)
                    elif dealer_action.index == stick.index:
                        self.done = True
                        self.handel_two_sticks()
                    else:
                        print('Unknown dealer action')
                    self.display_info('Dealer', dealer_action)
            else:
                print('Unkown player action')
        else:
            print('Game is over')
        return (self.next_state, self.reward, self.done)

In [135]:
env = Environment()
next_state, reward, done = env.init()
for i in range(100):
    player_action = env.sample_action()
    next_state, reward, done = env.step(next_state, player_action)
    if done:
        break

Player: Sticked
[5] : 5
Busted: False
Dealer:  hit
[1, 6] : 7
Busted: False
reward: 0 is_done: False

Player: Sticked
[5] : 5
Busted: False
Dealer:  hit
[1, 6, 7] : 14
Busted: False
reward: 0 is_done: False

Player: Sticked
[5] : 5
Busted: False
Dealer:  hit
[1, 6, 7, 8] : 22
Busted: True
reward: 1 is_done: True

