<a href="https://colab.research.google.com/github/basselkassem/easy21/blob/master/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libs

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from matplotlib import cm
import plotly.figure_factory as FF
from scipy.spatial import Delaunay
import seaborn as sns


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



# Game configuration

## Episodes number

In [0]:
EPISODES_NUMS = np.array([10**2, 10**3, 10**4,10**5])

[   100   1000  10000 100000]


## Maximum score to lose

In [0]:
MAX_VALUE = 21

## Minimum score to lose

In [0]:
MIN_VALUE = 1

## Maximum of the cards values

In [0]:
CARD_VALUE_MAX = 10

## Minimum of the cards values

In [0]:
CARD_VALUE_MIN = 1

## Maximum score for dealer to stick

In [0]:
DEALER_THRESHOLD = 17

## Action space

In [0]:
HIT = 0
STICK = 1
ACTION_SPACE = np.array([HIT, STICK])

[0 1]


## State Space
The **state** is represented as a 2D array
*   rows represent the dealer first card
*   columns represent the player's score



In [0]:
DEALER_FIRST_CARD_SPACE = np.arange(CARD_VALUE_MIN, CARD_VALUE_MAX + 1)
PLAYER_SOCRE = np.arange(MIN_VALUE, MAX_VALUE + 1)
STATE_SPACE_SHAPE = (len(DEALER_FIRST_CARD_SPACE), len(PLAYER_SOCRE),)

[ 1  2  3  4  5  6  7  8  9 10] (10,)
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21] (21,)
(10, 21)


## Action-State Space
The **Action-State** is represented as a 3D array


*   1th-D represents the dealer first card
*   2nd-D represent the player's score
*   3th-D represent the action



    
    


In [0]:
ACTION_STATE_SPACE_SHAPE = (len(DEALER_FIRST_CARD_SPACE), len(PLAYER_SOCRE), len(ACTION_SPACE))

(10, 21, 2)


## Epsilon greedy constant

In [0]:
N0_VALUES = np.array([50, 100, 1000])

[  50  100 1000]


#Td-Lambda

In [0]:
TD_LAMBDA = np.arange(0.1, 1, 0.1)

[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


#Card Class

In [0]:
class Card:
    def __init__(self, value, color):
        self.value = value
        self.color = color
        
    def display(self):
        print('color:', self.color, 'value:', self.value)
        
    def get_value(self):
        if self.color == 'red':
            return -self.value
        else:
            return self.value

#CardDeck Class

In [0]:
class CardDeck:
    def __init__(self):
        pass
    
    @staticmethod
    def draw():
        red_card_prop = 1 / 3
        color = ''
        if np.random.random() < red_card_prop:
            color = 'red'
        else:
            color = 'black'
        value = np.random.randint(CARD_VALUE_MIN, CARD_VALUE_MAX + 1)
        card = Card(value, color)
        return card
    
    @staticmethod
    def draw_black_card():
        color = 'black'
        value = np.random.randint(CARD_VALUE_MIN, CARD_VALUE_MAX + 1)
        card = Card(value, color)
        return card

#Player Class

In [0]:
class Player:
    def __init__(self):
        self.score = 0
        self.first_card = 0
    
    def set_score(self, score):
        self.score = score
    
    def hit(self):
        new_card = CardDeck.draw()
        self.score += new_card.get_value()

    def is_busted(self):
        busted = False
        if self.score < MIN_VALUE or self.score > MAX_VALUE:
            busted = True
        return busted
            
    def display_status(self):
        print('Score:', self.score)
        print('Busted:', self.is_busted())

# Environment Class

In [0]:
class Environment:

    def __init__(self, mode = 'debug' ):
        self.dealer = Player()
        self.player = Player()
        self.next_state = None
        self.reward = 0
        self.time_step = 0
        self.done = False
        self.mode = mode
    
    def sample_action(self):
        return np.random.randint(HIT, STICK + 1)
        
    def reward_func(self):
        if self.player.is_busted():
            return -1
        if self.dealer.is_busted():
            return 1
        return 0
    
    def is_done(self):
        if self.player.is_busted() or self.dealer.is_busted():
            self.done = True
        else:
            self.done = False
        return self.done
    
    def init(self):
        player_first_card = CardDeck.draw_black_card()
        dealer_first_card = CardDeck.draw_black_card()

        self.dealer.set_score(dealer_first_card.get_value())
        self.dealer.first_card = dealer_first_card.get_value()

        self.player.set_score(player_first_card.get_value())
        self.player.first_card = player_first_card.get_value()
        
        self.next_state = (self.dealer.first_card, self.player.score)

        return (self.next_state, self.reward, self.done)
    
    def dealer_step(self):
        self.dealer.hit()
        self.reward = self.reward_func()
        self.is_done()
    
    def player_step(self):
        self.player.hit()
        self.next_state = (self.dealer.first_card, self.player.score)
        self.reward = self.reward_func()
        self.is_done()
        
    def dealer_policy(self):
        action = None
        dealer_score = self.dealer.score
        if dealer_score >= DEALER_THRESHOLD:
            action = STICK
        else:
            action = HIT
        return action
    
    def handel_two_sticks(self):
        if self.dealer.score > self.player.score:
            self.reward = -1
        elif self.dealer.score < self.player.score:
            self.reward = 1
        else:
            self.reward = 0
    
    def display_info(self, name, action):
        if name == 'Player':
            print('Player: ', action)
            self.player.display_status()
            print('------')
            print('Dealer: not its turn yet')
            self.dealer.display_status()
        else:
            print('Player: Sticked')
            self.player.display_status()
            print('------')
            print('Dealer: ', action)
            self.dealer.display_status()
        print('reward:', self.reward, 'is_done:', self.done)
        print()        
    def re_init(self, dealer_first_card, player_score):
        self.player.score = player_score
        self.dealer.first_card = dealer_first_card
        self.dealer.score = dealer_first_card

    def step(self, state = None, player_action = -1):
        dealer_first_card, player_score = state
        self.re_init(dealer_first_card, player_score)
        if not self.done:
            self.time_step += 1
            if player_action == HIT:
                self.player_step()
                if self.mode == 'debug':
                    self.display_info('Player', player_action)
            elif player_action == STICK:
                while not(self.dealer.is_busted() or self.done):
                    dealer_action = self.dealer_policy()
                    if dealer_action == HIT:
                        self.dealer_step()
                    elif dealer_action == STICK:
                        self.done = True
                        self.handel_two_sticks()
                    else:
                        print('Unknown dealer action')
                    if self.mode == 'debug':
                        self.display_info('Dealer', dealer_action)
            else:
                print('Unkown player action')
        else:
            if self.mode == 'debug':
                print('Game is over')
        return (self.next_state, self.reward, self.done)

In [0]:
env = Environment(mode = 'no_debug')
next_state, reward, done = env.init()

for i in range(100):
    player_action = env.sample_action()
    next_state, reward, done = env.step(next_state, player_action = 0)
    if done:
        break

# class Visualizer

In [0]:
class Visualizer:
    def __init__(self):
        pass
    @staticmethod
    def prepare_axises(Q):
        X, Y, Z = [], [], []
        for dealer in DEALER_FIRST_CARD_SPACE:
            for player in range(1, MAX_VALUE + 1):
                state = dealer, player
                X.append(dealer)
                Y.append(player)
                Z.append(max(Q[state, HIT], Q[state, STICK]))
        return X, Y, Z
    @staticmethod
    def draw_surf(X, Y, Z, title):
        fig = plt.figure(figsize = (10, 8))
        ax = fig.gca(projection='3d')
        surf = ax.plot_trisurf(X, Y, Z, cmap=cm.coolwarm)
        ax.set_xlabel('dealer')
        ax.set_ylabel('player')
        ax.set_zlabel('Value')
        ax.set_xticklabels(range(1, CARD_VALUE_MAX + 1))
        ax.set_yticklabels(range(1, MAX_VALUE + 1))
        plt.title(title)
        plt.xticks(DEALER_FIRST_CARD_SPACE)
        plt.show()
    @staticmethod
    def draw_deluanay_surf(X, Y, Z, title):
        u = np.linspace(0, 2*np.pi, MAX_VALUE)
        v = np.linspace(0, 2*np.pi, CARD_VALUE_MAX)
        u,v = np.meshgrid(u,v)
        u = u.flatten()
        v = v.flatten()

        points2D = np.vstack([u,v]).T
        tri = Delaunay(points2D)
        simplices = tri.simplices

        fig = FF.create_trisurf(z=Z, x = X, y = Y, simplices=simplices)

        scene = dict(
            xaxis = dict(nticks=10, range=[CARD_VALUE_MIN, CARD_VALUE_MAX], tick0 = 1),
            yaxis = dict(nticks=21, range=[MIN_VALUE, MAX_VALUE], ticks='outside', tick0 = 1),
            zaxis = dict(nticks=10, range=[np.min(Z) - 0.5, np.max(Z) + 0.5],),
            xaxis_title='Dealer',
            yaxis_title='Player',
            zaxis_title='Value',
        )
        fig.update_layout(scene = scene,
                        title = title,
                        autosize = True,
                        width=700, 
                        height=500,
                        margin = dict(l=65, r=50, b=65, t=90)
        )
        fig.show()
    @staticmethod
    def visualize(Q, title):
        X, Y, Z = Visualizer.prepare_axises(Q)
        Visualizer.draw_deluanay_surf(X, Y, Z, title)
        Visualizer.draw_surf(X, Y, Z, title)

    @staticmethod
    def draw2d_square_array(array, title):
        fig, ax = plt.subplots(figsize = (11, 11))
        sns.heatmap(
            array, 
            linewidths = 0.1, 
            annot = True, 
            xticklabels = 1,
            yticklabels = 1,
            cbar = False,
            fmt = "0.2f",
            square = True,
            ax = ax,
            alpha = 0.8,
        )
        ax.set_title(title)
        plt.show()

    @staticmethod
    def draw2d_array(array, title, low_lim = False):
        fig, ax = plt.subplots(figsize = (8, 8))
        sns.heatmap(
            array, 
            linewidths = 0.1, 
            annot = True, 
            xticklabels = 1,
            yticklabels = 1,
            cbar = False,
            fmt = "0.2f",
            cmap = 'coolwarm',
            ax = ax,
            alpha = 0.8,
        )
        low = 1 if low_lim else 0
        ax.set_ylim(ax.get_ylim()[::-1])
        ax.set_xticklabels(range(low, CARD_VALUE_MAX + 1))
        ax.set_yticklabels(range(low, MAX_VALUE + 1))
        ax.set_xlabel("Dealer's first card")
        ax.set_ylabel("Player's score")
        ax.set_title(title)
        plt.show()