<a href="https://colab.research.google.com/github/basselkassem/easy21/blob/master/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install libs

In [1]:
!pip install import_ipynb



In [2]:
from google.colab import drive
drive.mount("/content/drive")
%cd /content/drive/'My Drive'/'Colab Notebooks'/easy21

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks/easy21


# Import Libs

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import import_ipynb
import game_config as conf

importing Jupyter notebook from game_config.ipynb
[   100   1000  10000 100000]
[0 1]
[ 1  2  3  4  5  6  7  8  9 10] (10,)
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21] (21,)
(10, 21)
(10, 21, 2)
[  50  100 1000]


#Card Class

In [0]:
class Card:
    def __init__(self, value, color):
        self.value = value
        self.color = color
        
    def display(self):
        print('color:', self.color, 'value:', self.value)
        
    def get_value(self):
        if self.color == 'red':
            return -self.value
        else:
            return self.value

#CardDeck Class

In [0]:
class CardDeck:
    def __init__(self):
        pass
    
    @staticmethod
    def draw():
        red_card_prop = 1 / 3
        color = ''
        if np.random.random() < red_card_prop:
            color = 'red'
        else:
            color = 'black'
        value = np.random.randint(conf.CARD_VALUE_MIN, conf.CARD_VALUE_MAX + 1)
        card = Card(value, color)
        return card
    
    @staticmethod
    def draw_black_card():
        color = 'black'
        value = np.random.randint(conf.CARD_VALUE_MIN, conf.CARD_VALUE_MAX + 1)
        card = Card(value, color)
        return card

#Player Class

In [0]:
class Player:
    def __init__(self):
        self.score = 0
        self.first_card = 0
    
    def set_score(self, score):
        self.score = score
    
    def hit(self):
        new_card = CardDeck.draw()
        self.score += new_card.get_value()

    def is_busted(self):
        busted = False
        if self.score < conf.MIN_VALUE or self.score > conf.MAX_VALUE:
            busted = True
        return busted
            
    def display_status(self):
        print('Score:', self.score)
        print('Busted:', self.is_busted())

# Environment Class

In [0]:
class Environment:

    def __init__(self):
        self.dealer = Player()
        self.player = Player()
        self.next_state = None
        self.reward = 0
        self.time_step = 0
        self.done = False
    
    def sample_action(self):
        return np.random.randint(conf.HIT, conf.STICK + 1)
        
    def reward_func(self):
        if self.player.is_busted():
            return -1
        if self.dealer.is_busted():
            return 1
        return 0
    
    def is_done(self):
        if self.player.is_busted() or self.dealer.is_busted():
            self.done = True
        else:
            self.done = False
        return self.done
    
    def init(self):
        player_first_card = CardDeck.draw_black_card()
        dealer_first_card = CardDeck.draw_black_card()

        self.dealer.set_score(dealer_first_card.get_value())
        self.dealer.first_card = dealer_first_card.get_value()

        self.player.set_score(player_first_card.get_value())
        self.player.first_card = player_first_card.get_value()
        
        self.next_state = (self.dealer.first_card, self.player.score)

        return (self.next_state, self.reward, self.done)
    
    def dealer_step(self):
        self.dealer.hit()
        self.reward = self.reward_func()
        self.is_done()
    
    def player_step(self):
        self.player.hit()
        self.next_state = (self.dealer.first_card, self.player.score)
        self.reward = self.reward_func()
        self.is_done()
        
    def dealer_policy(self):
        action = None
        dealer_score = self.dealer.score
        if dealer_score >= conf.DEALER_THRESHOLD:
            action = conf.STICK
        else:
            action = conf.HIT
        return action
    
    def handel_two_sticks(self):
        if self.dealer.score > self.player.score:
            self.reward = -1
        elif self.dealer.score < self.player.score:
            self.reward = 1
        else:
            self.reward = 0
    
    def display_info(self, name, action):
        if name == 'Player':
            print('Player: ', action)
            self.player.display_status()
            print('------')
            print('Dealer: not its turn yet')
            self.dealer.display_status()
        else:
            print('Player: Sticked')
            self.player.display_status()
            print('------')
            print('Dealer: ', action)
            self.dealer.display_status()
        print('reward:', self.reward, 'is_done:', self.done)
        print()        
    def re_init(self, dealer_first_card, player_score):
        self.player.score = player_score
        self.dealer.first_card = dealer_first_card
        self.dealer.score = dealer_first_card

    def step(self, state = None, player_action = -1):
        dealer_first_card, player_score = state
        self.re_init(dealer_first_card, player_score)
        if not self.done:
            self.time_step += 1
            if player_action == conf.HIT:
                self.player_step()
                #self.display_info('Player', player_action)
            elif player_action == conf.STICK:
                while not(self.dealer.is_busted() or self.done):
                    dealer_action = self.dealer_policy()
                    if dealer_action == conf.HIT:
                        self.dealer_step()
                    elif dealer_action == conf.STICK:
                        self.done = True
                        self.handel_two_sticks()
                    else:
                        print('Unknown dealer action')
                    #self.display_info('Dealer', dealer_action)
            else:
                print('Unkown player action')
        else:
            #print('Game is over')
            pass
        return (self.next_state, self.reward, self.done)

In [43]:
env = Environment()
next_state, reward, done = env.init()
print(env.player.first_card, env.dealer.first_card)

for i in range(100):
    player_action = env.sample_action()
    next_state, reward, done = env.step(next_state, player_action = 0)
    if done:
        break

2 3
