<a href="https://colab.research.google.com/github/basselkassem/easy21/blob/master/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# import Libs

In [0]:
import numpy as np
import matplotlib.pyplot as plt

#Card Class

In [0]:
class Card:
    def __init__(self, value, color):
        self.value = value
        self.color = color
        
    def display(self):
        print('color:', self.color, 'value:', self.value)
        
    def get_value(self):
        if self.color == 'red':
            return -self.value
        else:
            return self.value

#CardDeck Class

In [0]:
class CardDeck:
    def __init__(self):
        pass
    
    @staticmethod
    def draw():
        red_card_prop = 1 / 3
        color = ''
        if np.random.random() < red_card_prop:
            color = 'red'
        else:
            color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card
    
    @staticmethod
    def draw_black_card():
        color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card

#Player Class

In [0]:
class Player:
    def __init__(self):
        self.cards = []
        self.value = 0
        self.busted = False
    def stick(self):
        pass
    def hit(self):
        new_card = CardDeck.draw()
        self.add_card(new_card)
        self.compute_value()
        
    def compute_value(self):
        values = [card.get_value() for card in self.cards]
        self.value = np.sum(values)
        return self.value

    def is_busted(self):
        value = self.compute_value()
        if value < 1 or value > 21:
            self.busted = True
        else:
            self.busted =  False
        return self.busted
    
    def add_card(self, card):
        if card != None:
            self.cards.append(card)
        else:
            print('Can not add card to player')
            
    def display_status(self):
        self.compute_value()
        cards = [card.get_value() for card in self.cards]
        print(cards, ':', self.value)
        self.is_busted()
        print('Busted:', self.busted)

# State Class

In [0]:
class State:
    def __init__(self, dealer_first_card, player_value):
        self.dealer_first_card = dealer_first_card
        self.player_value = player_value
        
    def is_terminal(self):
        if self.player_value < 1 or self.player_value > 21:
            return True
        else:
            return False
    def display(self):
        print('Dealer first card:')
        self.dealer_first_card.display()
        print('Player value:', self.player_value)


# Environment Class

In [0]:
class Environment:
    def __init__(self):
        self.dealer = Player()
        self.player = Player()
        self.next_state = None
        self.reward = 0
        self.time_step = 0
        self.done = False
        self.ACTIONS_DIC = {'hit': 0, 'stick': 1}
        self.player_sticked = False
        self.dealer_sticked = False
    
    def sample_action(self):
        return np.random.randint(2)
        
    def reward_func(self):
        if self.player.is_busted():
            return -1
        if self.dealer.is_busted():
            return 1
        return 0
    
    def is_done(self):
        if self.player.is_busted() or self.dealer.is_busted():
            self.done = True
        else:
            self.done = False
        return self.done
    
    def first_step(self):
        player_first_card = CardDeck.draw_black_card()
        dealer_first_card = CardDeck.draw_black_card()
        
        self.dealer.cards.append(dealer_first_card)
        self.player.cards.append(player_first_card)
        
        player_value = self.player.compute_value()
        new_state = State(dealer_first_card, player_value)
        self.next_state = new_state
        
        self.reward = self.reward_func()
        
        self.is_done()
    
    def dealer_step(self):
        self.dealer.hit()  
        self.reward = self.reward_func()
        self.is_done()
    
    def player_step(self):
        self.player.hit()
    
        new_state = State(self.dealer.cards[0], self.player.value)
        self.next_state = new_state
        self.reward = self.reward_func()
        self.is_done()
        
    
    def dealer_policy(self):
        action = -1
        dealer_value = self.dealer.compute_value()
        if dealer_value >= 17:
            action = self.ACTIONS_DIC['stick']
        else:
            action = self.ACTIONS_DIC['hit']
        return action
    
    def handel_two_sticks(self):
        self.done = True
        dealer_value = self.dealer.compute_value()
        player_value = self.player.compute_value()
        if dealer_value > player_value:
            self.reward = -1
        elif dealer_value < player_value:
            self.reward = 1
        else:
            self.reward = 0
    
    def display_info(self, player_action, dealer_action):
        print('Player: ', list(self.ACTIONS_DIC.keys())[player_action])
        self.player.display_status()
        print('Dealer: ', list(self.ACTIONS_DIC.keys())[dealer_action])
        self.dealer.display_status()
        print('reward:', self.reward, 'is_done:', self.done)
        print()        
    
    def step(self, state = None, player_action = -1):
        self.time_step += 1
        dealer_action = self.dealer_policy()
        if self.time_step == 1:
            self.first_step()
        elif not self.is_done():
            if player_action == self.ACTIONS_DIC['stick']:
                self.player_sticked = True
                if dealer_action == self.ACTIONS_DIC['hit']:
                    if not self.dealer_sticked:
                        self.dealer_step()
                else:
                    self.dealer_sticked = True
                    self.handel_two_sticks()
            elif player_action == self.ACTIONS_DIC['hit']:
                if not self.player_sticked:
                    self.player_step()
            else:
                print('Unkown action')
        else:
            print('Game is done')
        self.display_info(player_action, dealer_action)       
        return (self.next_state, self.reward, self.done)
            

In [22]:
env = Environment()
arr = []
for i in range(100):
    player_action = env.sample_action()
    next_state, reward, done = env.step(player_action = player_action)
    if done:
        break
    #next_state.display()  


Player:  hit
[5] : 5
Busted: False
Dealer:  hit
[3] : 3
Busted: False
reward: 0 is_done: False

Player:  stick
[5] : 5
Busted: False
Dealer:  hit
[3, -8] : -5
Busted: True
reward: 1 is_done: True

