<a href="https://colab.research.google.com/github/basselkassem/easy21/blob/master/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libs

In [0]:
import numpy as np
import matplotlib.pyplot as plt

#Card Class

In [0]:
class Card:
    def __init__(self, value, color):
        self.value = value
        self.color = color
        
    def display(self):
        print('color:', self.color, 'value:', self.value)
        
    def get_value(self):
        if self.color == 'red':
            return -self.value
        else:
            return self.value

#CardDeck Class

In [0]:
class CardDeck:
    def __init__(self):
        pass
    
    @staticmethod
    def draw():
        red_card_prop = 1 / 3
        color = ''
        if np.random.random() < red_card_prop:
            color = 'red'
        else:
            color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card
    
    @staticmethod
    def draw_black_card():
        color = 'black'
        value = np.random.randint(1, 10)
        card = Card(value, color)
        return card

#Player Class

In [0]:
class Player:
    def __init__(self):
        self.cards = []
        self.score = 0
        self.busted = False

    def stick(self):
        pass

    def hit(self):
        new_card = CardDeck.draw()
        self.add_card(new_card)
        self.compute_score()
        
    def compute_score(self):
        self.score = np.sum([card.get_value() for card in self.cards])
        return self.score

    def is_busted(self):
        score = self.compute_score()
        if score < 1 or score > 21:
            self.busted = True
        else:
            self.busted =  False
        return self.busted
    
    def add_card(self, card):
        if card != None:
            self.cards.append(card)
        else:
            print('Can not add null card to player')
            
    def display_status(self):
        self.compute_score()
        card_values = [card.get_value() for card in self.cards]
        print(card_values, ':', self.score)
        self.is_busted()
        print('Busted:', self.busted)

# State Class

In [0]:
class State:
    def __init__(self, dealer_first_card, player_score):
        self.dealer_first_card = dealer_first_card
        self.player_score = player_score
        
    def is_terminal(self):
        if self.player_score < 1 or self.player_score > 21:
            return True
        else:
            return False

    def display(self):
        print('Dealer first card:')
        self.dealer_first_card.display()
        print('Player Score:', self.player_score)


# Action Class

In [0]:
class Action:
    def __init__(self, index):
        self.index = index
        if index == 0:
            self.name = 'hit'
        elif index == 1:
            self.name = 'stick'
        else:
            self.name = 'unknown'

hit = Action(0)
stick = Action(1)

# Environment Class

In [0]:
class Environment:
    def __init__(self):
        self.dealer = Player()
        self.player = Player()
        self.next_state = None
        self.reward = 0
        self.time_step = 0
        self.done = False
        
        self.player_sticked = False
        self.dealer_sticked = False

        self.first_step()
    
    def sample_action(self):
        return np.random.randint(hit.index, stick.index)
        
    def reward_func(self):
        if self.player.is_busted():
            return -1
        if self.dealer.is_busted():
            return 1
        return 0
    
    def is_done(self):
        if self.player.is_busted() or self.dealer.is_busted():
            self.done = True
        else:
            self.done = False
        return self.done
    
    def first_step(self):
        player_first_card = CardDeck.draw_black_card()
        dealer_first_card = CardDeck.draw_black_card()
        
        self.dealer.cards.append(dealer_first_card)
        self.player.cards.append(player_first_card)
        
        player_score = self.player.compute_score()
        new_state = State(dealer_first_card, player_score)
        self.next_state = new_state
        
        self.reward = self.reward_func()
        self.is_done()
    
    def dealer_step(self):
        self.dealer.hit()  
        self.reward = self.reward_func()
        self.is_done()
    
    def player_step(self):
        self.player.hit()
        new_state = State(self.dealer.cards[0], self.player.score)
        self.next_state = new_state
        self.reward = self.reward_func()
        self.is_done()
        
    def dealer_policy(self):
        action = None
        dealer_score = self.dealer.score
        if dealer_score >= 17:
            action = stick
        else:
            action = hit
        return action
    
    def handel_two_sticks(self):
        self.done = True
        if self.dealer.score > self.player.score:
            self.reward = -1
        elif self.dealer.score < self.player.score:
            self.reward = 1
        else:
            self.reward = 0
    
    def display_info(self, player_action, dealer_action):
        print('Player: ', player_action.name)
        self.player.display_status()
        print('Dealer: ', dealer_action.name)
        self.dealer.display_status()
        print('reward:', self.reward, 'is_done:', self.done)
        print()        
    
    def step(self, state = None, player_action = -1):
        action = None
        dealer_action = self.dealer_policy()
        if not self.is_done():
            self.time_step += 1
            if player_action == stick.index:
                action = stick
                self.player_sticked = True
                if dealer_action == hit.index:
                    if not self.dealer_sticked:
                        self.dealer_step()
                else:
                    self.dealer_sticked = True
                    self.handel_two_sticks()
            elif player_action == hit.index:
                action = hit
                if not self.player_sticked:
                    self.player_step()
            else:
                print('Unkown action')
            self.display_info(action, dealer_action)
        else:
            print('Game is done')
               
        return (self.next_state, self.reward, self.done)
            

In [29]:
env = Environment()
arr = []
for i in range(100):
    player_action = env.sample_action()
    print(player_action)
    next_state, reward, done = env.step(player_action = player_action)
    if done:
        break
    #next_state.display()  


0
0
Player:  hit
[6, 7] : 13
Busted: False
Dealer:  hit
[3] : 3
Busted: False
reward: 0 is_done: False

0
Player:  hit
[6, 7, 2] : 15
Busted: False
Dealer:  hit
[3] : 3
Busted: False
reward: 0 is_done: False

0
Player:  hit
[6, 7, 2, 5] : 20
Busted: False
Dealer:  hit
[3] : 3
Busted: False
reward: 0 is_done: False

0
Player:  hit
[6, 7, 2, 5, -3] : 17
Busted: False
Dealer:  hit
[3] : 3
Busted: False
reward: 0 is_done: False

0
Player:  hit
[6, 7, 2, 5, -3, 9] : 26
Busted: True
Dealer:  hit
[3] : 3
Busted: False
reward: -1 is_done: True

