# Learning blackjack

In [None]:
from copy import deepcopy
import time
import sys
import matplotlib.pyplot as plt
import numpy as np
import random
import itertools
from IPython import display

## Card Class

In [None]:
RANKS = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]
SUITS = ["c", "d", "h", "s"]
RANK_VALUE = {'A': 11,'2': 2,'3': 3,'4': 4,'5': 5,'6': 6,'7': 7,'8': 8,'9': 9,'10': 10,'J': 10,'Q': 10,'K': 10}

class Card(object):
    """ A playing card. """
    def __init__(self, rank, suit):
        self.rank = rank
        self.suit = suit

    def __str__(self):
        return self.rank + self.suit
        
    def value(self):
        return RANK_VALUE[self.rank]

## Deck Class

In [None]:
class Deck(object):
    """ One or multiple decks of playing cards """
    def __init__(self, n_decks=1):
        simple_cards = list(itertools.product(RANKS, SUITS))
        self.cards = [Card(*tuple) for tuple in simple_cards] * n_decks
        self.shuffle()

    def __str__(self):
        return ' '.join([str(card) for card in self.cards]) if self.cards else 'empty Deck'
        
    def shuffle(self):
        random.shuffle(self.cards)
    
    def next_card(self):
        return self.cards.pop()
        
    def cards_left(self):
        return len(self.cards)

## Hand Class

In [None]:
class Hand(object):
    """ A hand of playing cards. """
    def __init__(self):
        self.cards = []
        self.bet = 0
        self.stand = False
        self.dd = 0
        self.split = 3
    
    def __str__(self):
        return ' '.join([str(card) for card in self.cards]) if self.cards else 'empty Hand'
    
    def sorted_list(self):
        return sorted([card.rank for card in self.cards])
    
    def clear(self):
        self.cards = []
        self.bet = 0
        self.stand = False
        self.dd = 0
        self.split = 3

    def draw_from(self,deck):
        c = deck.next_card()
        self.cards.append(c)
        return c
        
    def add(self,card):
        self.cards.append(card)
        
    def value(self):
        ace = 0
        val = 0
        for card in self.cards:
            val += card.value()
            if card.value() == 11:
                ace += 1
        while val > 21 and ace > 0:
            val -= 10
            ace -= 1
        return val
        
    def is_triple_seven(self):
        return all([c.rank == 7 for c in self.cards]) and (len(self.cards) == 3)
        
    def is_blackjack(self):
        return (len(self.cards) == 2) and (self.value() == 21)
        
    def can_split(self):
        return (len(self.cards) == 2) and (self.cards[0].rank == self.cards[1].rank)
    
    def is_busted(self):
        return self.value() > 21

## Player Class

In [None]:
class Player(object):
    """ A Player. """
    def __init__(self, name, cre=0):
        self.credits = cre
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.name = name
        self.game = Game()
        self.history = History()
    
    def reset(self):
        for h in self.hands:
            h.clear()
        del self.hands[1:]
        self.game.reset()
        
    def hit_otherwise_stand(self,h=0):
        pass
    
    def double_down(self,h=0):
        pass
    
    def split_hand(self,h=0):
        pass
    
    def update_strategy(self, num_games):
        pass
    
    def assign_table(self, table_playing_at):
        self.table = table_playing_at   

    def draw_card(self,deck,h=0):
        c = self.hands[h].draw_from(deck)
        if self.table.output:
            print(self.name + " has drawn a " + c.__str__() + " (hand no. " + str(h) + ").")
    
    def bet(self,h=0):
        b = 10
        self.hands[h].bet = b
        self.credits -= b
        self.game.gain -= b
        
    def some_hand_stands(self):
        return any([h.stand for h in self.hands])
        
    def can_play(self):
        return any([not (h.stand or h.is_busted()) for h in self.hands])
        
    def show_cards(self,h=0):
        if self.table.output:
            print(self.name + " has drawn the following cards: (hand no. " + str(h) + ").")
            print(self.hands[h].__str__() + " (Value: " + str(self.hands[h].value()) + ")")    
    
    def do_split_hand(self, deck, h=0):
        self.hands[h].split = 3
        hand2 = Hand()         
        hand2.add(self.hands[h].cards.pop())
        self.draw_card(deck, h)
        hand2.bet = self.hands[h].bet
        self.credits -= self.hands[h].bet
        self.hands.append(hand2)
        self.draw_card(deck, h+1)

    def do_double_down(self,deck,h=0):
        self.draw_card(deck,h)
        self.credits -= self.hands[h].bet
        self.game.gain -= self.hands[h].bet
        if self.hands[h].value()>21:
            if self.table.output:
                print(self.name + ", you busted!")
        else:
            self.hands[h].stand = True
            self.hands[h].bet = self.hands[h].bet * 2
        
    def process_information(self, information, game_id):
        self.credits += sum(information['gains'])
        self.game.gain += sum(information['gains'])
        if self.table.output:
            print(self.name + " has " + str(self.credits) + " credits!")

## Dealer Class

In [None]:
class Dealer(Player):
    """ Dealer, a player with a predefined strategy. """
    def __init__(self):
        self.credits = 0
        self.name = "Dealer"
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.game = Game()
        
    def hit_otherwise_stand(self,h=0):
        return (self.hands[h].value() < 17)

## HumanPlayer Class

In [None]:
class HumanPlayer(Player):
    def __init__(self, name="Human", cre=0):
        self.credits = cre
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.name = name
        self.game = Game()
        self.history = History()
    
    def hit_otherwise_stand(self,h=0):
        s = "do you want to hit?"
        hit = self.yes_no(s)
        self.hands[h].stand = not hit
        if self.table.output and hit:
            print(self.name + ", you chose to hit.")
        elif self.table.output:
            print(self.name + ", you chose to stand.")
        return hit
    
    def split_hand(self,h=0):
        if self.hands[h].can_split():
            s = "do you want to split your hand?"
            split = self.yes_no(s)
            if self.table.output and split:
                print(self.name + ", you chose to split.")
            elif self.table.output:
                print(self.name + ", you chose not to split.")
            return split
        else:
            if self.table.output:
                print(self.name + ", you cannot split.")
            return False
        
    def double_down(self,h=0):
        s = "do you want to double your bet and get exactly one more card?"
        ddown = self.yes_no(s)
        if self.table.output and ddown:
            print(self.name + ", you chose to double-down.")
        elif self.table.output:
            print(self.name + ", you chose not to double-down.")
        return ddown

    def yes_no(self,s):
        response = raw_input(self.name + ", " + s + " (Y/N): ")
        return (response == "y" or response == "Y")

## OptimalPlayer Class

In [None]:
class OptimalPlayer(Player):
    """ A player using the optimal strategy. """
    #all his actions are hard coded
    def __init__(self, name, cre=0):
        self.credits = cre
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.name = name
        self.game = Game()
        self.history = History()
        
    def split_hand(self,h=0):
    # Checks if the player can spilt his hand then checks if he should spilt his hand
        if self.hands[h].can_split():
            if (self.hands[h].cards[0].value() == 11) or (self.hands[h].cards[0].value() == 8):
                return True
            if self.hands[h].cards[0].value() == 9:
                if (2<=self.table.dealer.hands[0].value()<=6) or (8<=self.table.dealer.hands[0].value() <= 9):
                    return True
                else:
                    return False
            if self.hands[h].cards[0].value() == 7:
                if (2<=self.table.dealer.hands[0].value()<=8):
                    return True
                else:
                    return False
            if (self.hands[h].cards[0].value() == 6) or (self.hands[h].cards[0].value() == 3) or (self.hands[h].cards[0].value() == 2):
                if (2<=self.table.dealer.hands[0].value()<=7):
                    return True
                else:
                    return False
            if (self.hands[h].cards[0].value() == 4):
                if self.table.dealer.hands[0].value()==5:
                    return True
                else:
                    return False
            if (self.hands[h].cards[0].value() == 10) or (self.hands[h].cards[0].value() == 5):
                return False
        else:
            return False
    
    def double_down(self,h=0):
    # Checks if the player  should double down
        if "A" in self.hands[h].sorted_list():
            if self.hands[h].value()>=19:
                return False
            if self.hands[h].value() == 18:
                if (4<=self.table.dealer.hands[0].value()<=6):
                    return True
                else:
                    return False
            if self.hands[h].value() == 17:
                if (3<=self.table.dealer.hands[0].value()<=6):
                    return True
                else:
                    return False                
            if (13 <=self.hands[h].value() <= 16):
                if (5<=self.table.dealer.hands[0].value()<=6):
                    return True
                else:
                    return False
            if self.hands[h].value() == 12:
                if (self.table.dealer.hands[0].value() == 5):
                    return True
                else:
                    return False 
        else:
            if (self.hands[h].value() >= 12) or (self.hands[h].value()<=8):
                return False
            if self.hands[h].value() == 11:
                if (2<=self.table.dealer.hands[0].value()<=10):
                    return True
                else:
                    return False 
            if self.hands[h].value() == 10:
                if (2<=self.table.dealer.hands[0].value()<=9):
                    return True
                else:
                    return False 
            if self.hands[h].value() == 9:
                if (2<=self.table.dealer.hands[0].value()<=6):
                    return True
                else:
                    return False
    
    def hit_otherwise_stand(self,h=0):
    # Checks if the player  should hit or stand
        if "A" in self.hands[h].sorted_list():
            if (self.table.dealer.hands[0].value()<=8) or (self.table.dealer.hands[0].value() == 11):
                if self.hands[h].value() < 18:
                    self.hands[h].stand = 0
                    return True
                else:
                    self.hands[h].stand = 1
                    return False
            if (9<=self.table.dealer.hands[0].value()<=10):
                if self.hands[h].value() < 19:
                    self.hands[h].stand = 0
                    return True
                else:
                    self.hands[h].stand = 1
                    return False
        else:
            if (2<=self.table.dealer.hands[0].value()<=3):
                if self.hands[h].value() < 13:
                    self.hands[h].stand = 0
                    return True
                else:
                    self.hands[h].stand = 1
                    return False
            if (4<=self.table.dealer.hands[0].value()<=6):
                if self.hands[h].value() < 12:
                    self.hands[h].stand = 0
                    return True
                else:
                    self.hands[h].stand = 1
                    return False
            if (self.table.dealer.hands[0].value()>=7):
                if self.hands[h].value() < 17:
                    self.hands[h].stand = 0
                    return True
                else:
                    self.hands[h].stand = 1
                    return False

## StrategicPlayer

Encoding of game state:
- split = 0 => make choice; split = 1 => split (not used); split = 2 => split possible but not done; 
    split = 3 => option not available 
- dd = 0 => make choice; dd = 1 => doubled down; dd = 2 => not doubled down

Note: the chosen action is recorded in the same position in self.game.action as the corresponding game state is saved in self.game.game_state -- therefore, all relevant information is present: the information that was available when the agent had to choose as well as its decision

In [None]:
class StrategicPlayer(Player):
    """ A player with different strategies. """
    #has an object strategy, which is a look-up table and defines all actions
    def __init__(self, name, cre=0):
        self.credits = cre
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.name = name
        self.strategy = Strategy()
        self.default = [0.5,0,0]
            # split o/wise not
            # doubledown o/wise not
            # hit o/wise stand       
        self.game = Game()
        self.history = History()

    def get_state(self, h):
        if self.table.output:
            print("Complete game state: (spl, dd, hand, dealer)", 
                  self.hands[h].split, 
                  self.hands[h].dd, 
                  self.hands[h].sorted_list(), 
                  self.table.dealer.hands[0].sorted_list())
        s = GameState(self.hands[h].split,
                      self.hands[h].dd,
                      self.hands[h].sorted_list(), 
                      self.table.dealer.hands[0].sorted_list())
        
        return s.__hash__()
    
    def get_state_non_hash(self, h):
        s = [self.hands[h].split,
             self.hands[h].dd,
             self.hands[h].sorted_list(),
             self.table.dealer.hands[0].sorted_list()]
        return s
    
    def action(self, h):
        gstmp = self.get_state(h)
        if gstmp in self.strategy.table:
            p = self.strategy.table[gstmp]
        else:
            if self.table.output:
                print("Hand no. ", h, ": ", "Split status: ", 
                      self.hands[h].split, 
                      " -- double down status", self.hands[h].dd)
            if self.hands[h].split == 0:
                p = self.default[0] #split o/wise not
            elif self.hands[h].dd == 0:
                p = self.default[1] # doubledown o/wise not
            else:
                p = self.default[2] # hit o/wise stand   
            self.strategy.table[gstmp] = p
            
        act = p > random.random()
        if act:
            self.game.prob.append(p)
        else:
            self.game.prob.append(1-p)            

        self.game.game_state.append(self.get_state_non_hash(h))
        self.game.action.append(act)
        return act
        
    def double_down(self,h=0):
        self.hands[h].dd = 0 
        dd = self.action(h)
        if dd:
            if self.table.output:
                print(self.name + " chose to double down.")
            self.hands[h].dd = 1
        else:
            if self.table.output:
                print(self.name + " chose not to double down.")
            self.hands[h].dd = 2
        return dd
        
    def hit_otherwise_stand(self,h=0):
        hit = self.action(h)        
        self.hands[h].stand = not hit
        if hit:
            if self.table.output:
                print(self.name + " chose to hit.")
        else:
            if self.table.output:
                print(self.name + " chose to stand.")
        return hit
    
    def split_hand(self,h=0):
        if self.hands[h].can_split():
            self.hands[h].split = 0
            res = self.action(h)
            if res:
                self.hands[h].split = 1
                if self.table.output:
                    print(self.name + " chose to split.")
            else:
                self.hands[h].split = 2
                if self.table.output:
                    print(self.name + " chose not to split.")
            return res
        else:
            if self.table.output:
                    print(self.name + " could not split.")
            self.hands[h].split = 3
            return False

##  History

In [None]:
class History(object):
    def __init__(self):
        self.game = []
        
    def add_game(self, g):
        self.game.append(deepcopy(g)) 
        
    def remove_almost_all_games(self, nn):
        self.game = self.game[(-nn):(-1)]

## Game Class

In [None]:
class Game(object):
    def __init__(self):
        self.prob = []
        self.game_state = []
        self.action = []
        self.gain = 0
        
    def reset(self):
        self.prob = []
        self.game_state = []
        self.action = []
        self.gain = 0
    
    def __str__(self):
        return self.prob

## BanditGame Class

In [None]:
class BanditGame(Game):
    def __init__(self):
        Game.__init__(self)
        self.first_decision = None

## Strategy Class

In [None]:
class Strategy(object):
    def __init__(self):
        self.table = {}

## GameState Class

In [None]:
class GameState(object):
    def __init__(self,split,dd,player_hand,dealer_hand):
        self.hand_split = split
        self.hand_dd = dd
        self.player_hand = player_hand
        self.dealer_hand = dealer_hand
        
    def __hash__(self):
        return hash((self.hand_split,self.hand_dd,self.player_hand.__str__(),"-",self.dealer_hand.__str__()))
    
    def __str__(self):
        str(self.hand_split, 
            self.hand_dd, 
            self.player_hand.sorted_list(),
            self.dealer_hand.sorted_list())
        

## RandomPlayer Class

In [None]:
class RandomPlayer(StrategicPlayer):
    """ A player with the following random strategy: Each action (split, double down, hit) is chosen with a probability of 0.5."""
    def __init__(self, name, cre=0):
        StrategicPlayer.__init__(self, name, cre)
        self.default = [0.5,0.5,0.5]      

## AlwaysStandPlayer Class

In [None]:
class AlwaysStandPlayer(StrategicPlayer):
    """ A player who chooses "false" for each action (split, double down, hit), i.e. the player never splits/doubles down and always stands."""
    def __init__(self, name, cre=0):
        StrategicPlayer.__init__(self, name, cre)
        self.default = [0,0,0]      

## Bandit Class

In [None]:
class Bandit(StrategicPlayer):
    "A player who never splits and treats the first decision between double-down, hit, or stand as a 3-armed bandit"
    "All subsequent decisions are always 'stand'"
    def __init__(self, name = "Bandit", cre=0):
        self.credits = cre
        self.hands = []
        hand1 = Hand()
        self.hands.append(hand1)
        self.name = name
        self.strategy = Strategy()      
        self.game = BanditGame()
        self.history = History()
        self.first_decision_selected = False
        self.first_decision_played = False
        self.arm_probabilities = 1/3.0*np.ones(3)
    
    def reset(self):
        for h in self.hands:
            h.clear()
        del self.hands[1:]
        self.game.reset()
        self.first_decision_selected = False
        self.first_decision_played = False
        self.arm_probabilities = 1/3.0*np.ones(3)


    def first_decision(self):
        # select the first decision according to the probabilities given in self.arm_probabilities
        if not self.first_decision_selected:
            # draw: dd (encoded as 0), hit (encoded as 1), or stand (encoded as 2) (3-armed bandit)
            self.decision = np.random.choice(range(3), p = self.arm_probabilities)
            # record that decision has been selected for this game
            self.first_decision_selected = True
            # record decision
            self.game.first_decision = self.decision
            if self.table.output:
                print self.name + "'s first decision is: " + str(self.decision)
        return self.decision
    
    def action(self, h):
        # play chosen action if this has not yet occurred 
        if not self.first_decision_played:
            # get decision and assign corresponding boolean value to act
            first_choice = self.first_decision()
            if self.table.output:
                print("Hand no. ", h, ": ", "Split status: ", 
                      self.hands[h].split, 
                      " -- double down status", self.hands[h].dd)
            if self.hands[h].split == 0:
                act = False #never split
            elif self.hands[h].dd == 0:
                if first_choice == 0:
                    act = True #doubledown 
                    self.first_decision_played = True
                else:
                    act = False
            else:
                if first_choice == 1:
                    act = True
                    self.first_decision_played = True
                elif first_choice == 2: 
                    act = False
                    self.first_decision_played = True
            # record game state
            self.game.game_state.append(self.get_state_non_hash(h))
        else: 
            # subsequent actions are all "stand"
            act = False
                    
        return act

## EpsGreedyBandit Class

In [None]:
class EpsGreedyBandit(Bandit):
    """ Updates arm probabilities according to an eps-greedy strategy"""
    def __init__(self, eps, name = "EpsGreedyBandit", cre=0):
        Bandit.__init__(self,name,cre)
        self.eps = eps

    def update_strategy(self, num_games):
        # compute cumulative gain for each action over the number of games played so far
        cum_gain_double_down = 0
        cum_gain_hit = 0
        cum_gain_stand = 0

        # go through all games in history
        for g in self.history.game:
            # get decision that was taken
            if g.first_decision == 0:
                cum_gain_double_down = cum_gain_double_down + g.gain 
            elif g.first_decision == 1:
                cum_gain_hit = cum_gain_hit + g.gain 
            else:
                cum_gain_stand = cum_gain_stand + g.gain
        
        # determine action/arm with largest gain
        gains = np.array([cum_gain_double_down, cum_gain_hit, cum_gain_stand])
        index_max = np.where(gains==max(gains))
        # update self.arm_probabilities accordingly
        self.arm_probabilities[index_max] = (1 - self.eps)/index_max[0].size
        if(index_max[0].size != 3):
             self.arm_probabilities[np.in1d(range(3), index_max, invert = True)] = self.eps/(3-index_max[0].size)
        self.arm_probabilities = self.arm_probabilities/sum( self.arm_probabilities)

        if self.table.output: 
            print "\n" + self.name + " updated his/her strategy!"
            print "Arm probabilities " + str(self.arm_probabilities)

## EpsGreedyContextualBandit Class

In [None]:
class EpsGreedyContextualBandit(Bandit):
    """ A contextual bandit approach with takes into account the game state when updating its strategy in an eps-greedy fashion."""
    def __init__(self, eps, name = "ContextualBandit", cre=0):
        Bandit.__init__(self,name,cre)
        self.eps = eps
        
    def first_decision(self):
        # get game state
        gstmp = self.get_state(0)
        # check whether probabilities for this game state are given in strategy table 
        if gstmp in self.strategy.table:
            # get arm probabilities from strategy table 
            p = self.strategy.table[gstmp]
        else:
            # choose default values
            p = self.arm_probabilities
            self.strategy.table[gstmp] = p
        if not self.first_decision_selected :
            # draw: dd (encoded as 0), hit (encoded as 1), or stand (encoded as 2) (3-armed bandit) according to p
            self.decision = np.random.choice(range(3), p = p)
            # record that decision has been selected for this game
            self.first_decision_selected = True
            # record decision
            self.game.first_decision = self.decision
            if self.table.output:
                print self.name + "'s first decision is: " + str(self.decision)
        return self.decision
    

    def update_strategy_table(self):
        # compute cumulative gain for each action and game state over the number of games played so far
        cum_gain_double_down = dict.fromkeys(self.strategy.table.keys(),0)
        cum_gain_hit = dict.fromkeys(self.strategy.table.keys(),0)
        cum_gain_stand = dict.fromkeys(self.strategy.table.keys(),0)

        # go through all games in history
        for g in self.history.game:
            # go through all game states in game g
            for decisions_game_i in range(len(g.game_state)):
                # get hashed game state
                a = GameState(*g.game_state[decisions_game_i]).__hash__()              
                # get decision that was taken
                if g.first_decision == 0:
                    cum_gain_double_down[a] = cum_gain_double_down[a] + g.gain 
                elif g.first_decision == 1:
                    cum_gain_hit[a] = cum_gain_hit[a] + g.gain 
                else:
                    cum_gain_stand[a] = cum_gain_stand[a] + g.gain 
        
        # determine action/arm with largest gain for each game state
        for game_state_hashed in self.strategy.table.keys():
            gains = np.array([cum_gain_double_down[game_state_hashed], cum_gain_hit[game_state_hashed], cum_gain_stand[game_state_hashed]])
            index_max = np.where(gains==max(gains))
            # update arm probabilities accordingly
            probs = np.zeros(3)
            probs[index_max] = (1 - self.eps)/index_max[0].size
            if(index_max[0].size != 3):
                 probs[np.in1d(range(3), index_max, invert = True)] = self.eps/(3-index_max[0].size)
            self.strategy.table[game_state_hashed] = probs/sum(probs)
                    
    def update_strategy(self, num_games):
        # update strategy after a number of games, at given intervals, etc. to achieve best performance
        if (num_games % 100 == 0):
            if(num_games > 1000): #remove for assignment
                self.update_strategy_table()
                print "\n" + self.name + " updated his/her strategy!"

## Table Class

In [None]:
class Table(object):
    def __init__(self, players, n_decks):
        self.n_decks = n_decks
        self.players = players
        self.dealer = Dealer()
        self.dealer.name = "Dealer"
        self.output = False

    def reset(self):
        self.deck = Deck(self.n_decks)
        self.reset_players()
        self.reset_dealer()
        self.game_state = []
        self.gain = 0

    def reset_players(self):
        for p in self.players:
            p.reset()
            p.assign_table(self)
            p.draw_card(self.deck)
            p.draw_card(self.deck)

    def reset_dealer(self):
        self.dealer.reset()
        self.dealer.assign_table(self)
        self.dealer.draw_card(self.deck)


    def play_dealer(self):
        while self.dealer.hit_otherwise_stand():
            self.dealer.draw_card(self.deck)
            self.dealer.show_cards()
        return self.dealer.hands[0]


    def play_a_game(self, game_id):
        self.reset()
        for p in self.players:
            
            if self.output:
                print "\n"
            
            # player determines his bet
            p.bet()
            
            # player is asked whether he wants to split and/or to double down 
            for h in range(len(p.hands)):
                if not p.hands[h].stand and not p.hands[h].is_busted():
                    if p.split_hand(h):
                        p.do_split_hand(self.deck, h)
                    if p.double_down(h):
                        p.do_double_down(self.deck, h)
                        p.show_cards(h)
            
            # player is asked whether he wants to hit or stand until he busts or stands
            while p.can_play():
                for h in range(len(p.hands)):
                    if not p.hands[h].stand and not p.hands[h].is_busted():
                        if p.hit_otherwise_stand(h):
                            p.draw_card(self.deck,h)
                            if p.hands[h].value()>21:
                                if self.output:
                                    print(p.name + ", you busted!")
                        p.show_cards(h)
                        
            if self.output:
                print "\n"
            
        if(any([p.some_hand_stands() for p in self.players])):
            dhand = self.play_dealer()
        else:
            dhand = self.dealer.hands[0]

        for p in self.players:
            information = self.evaluate_p_vs_d(p, dhand)
            p.process_information(information,game_id)


    def evaluate_p_vs_d(self, p, dhand):
        gains = [0] * len(p.hands)
        for i, phand in enumerate(p.hands):
            #print phand, '/', dhand
            if phand.is_busted():
                # player loses
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " is busted! (Value: " + str(phand.value()) 
                          + ") Bet of " + str(phand.bet) + " credits is lost!")
                gains[i] = 0
            elif dhand.is_blackjack() and phand.is_blackjack() and (len(p.hands) == 1):
                ###drawn
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " is a blackjack but dealer also has a blackjack! Getting bet of " 
                          + str(phand.bet) + " back!")
                gains[i] = phand.bet
            elif phand.is_triple_seven():
                ###player wins
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " is a tripe 7! Winning " + str(1.5 * phand.bet) 
                          + " credits in addition to bet of " + str(phand.bet) +" credits!")
                gains[i] = 2*phand.bet
            elif dhand.is_blackjack():
                ###player loses
                if(self.output):
                    print("\nDealer has a blackjack!  Bet of " 
                          + str(phand.bet) + " credits is lost!")
                gains[i] = 0
            elif phand.is_blackjack() and (len(p.hands) == 1):
                ###player wins
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " is a blackjack! Winning " + str(1.5*phand.bet) 
                          + " credits in addition to bet of " + str(phand.bet) +" credits!")
                gains[i] = 2.5*phand.bet            
            elif dhand.is_busted():
                ###player wins
                if(self.output):
                    print("\nDealer is busted! Winning " + str(phand.bet) 
                          + " credits in addition to bet of " + str(phand.bet) +" credits!")
                gains[i] = 2*phand.bet
            elif dhand.value() < phand.value():
                ###player wins
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " has larger value (" + str(phand.value()) 
                          + ") than dealer's hand! Winning " 
                          + str(phand.bet) + " credits in addition to bet of " 
                          + str(phand.bet) +" credits!")
                gains[i] = 2*phand.bet
            elif dhand.value() == phand.value():
                ###drawn
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " has same value (" + str(phand.value()) 
                          + ") as dealer's hand! Getting bet of " 
                          + str(phand.bet) + " back!")
                gains[i] = phand.bet
            else:
                ###player loses
                if(self.output):
                    print("\n" + p.name + "'s hand number " + str(i) + 
                          " has smaller value (" + str(phand.value()) 
                          + ") than dealer's hand! Bet of " 
                          + str(phand.bet) + " credits is lost!")
                gains[i] = 0
        #print gains
        return {'gains': gains}

## Play Blackjack

In [None]:
%matplotlib

random.seed(2)

do_plot = False

n_games = 1000
n_decks = 6

# create and add players
p0 = OptimalPlayer(name = "Optimal")
p1 = RandomPlayer(name = "Rando")
p2 = AlwaysStandPlayer(name = "AlwaysStand")
p3 = EpsGreedyBandit(eps = 0.2, name = "epsStanding")
p4 = EpsGreedyContextualBandit(eps = 0.01, name = "epsContextual")
players = [p0, p1, p2, p3, p4] 

Credits = np.zeros((len(players),n_games))

if(do_plot):
    plt.close("all")
    plt.axis([0, n_games, -1000, 1000])
    colors = plt.get_cmap('jet')(np.linspace(0, 1.0, len(players)))
    lines = [plt.plot([], [], label=p.name, color = colors[i])[0] for i, p in enumerate(players)]
    plt.legend(loc='upper right', fontsize=10)
    plt.show()

tableETH = Table(players, n_decks)
# tableETH.output = True


for game_id in range(n_games):
    if tableETH.output:
        print("\n\n ---- Game number " + str(game_id) + " is being played! ----")
    else: 
        print "\rYou have finished %d games" % game_id,
        sys.stdout.flush()
    
    #play game
    tableETH.play_a_game(game_id)
    
    # visualize
    for i, p in enumerate(players):
        Credits[i, game_id] = p.credits
    
    for p in players:
        p.history.add_game(p.game)
        p.update_strategy(game_id)    
        
    if (game_id >= 10000):
        print_at = 10000
        if (game_id % print_at == 0):
            for i, p in enumerate(players):
                print("\nPlayer " + p.name + " after game no. " + str(game_id) +
                      ": Average gain over last " + str(print_at) + " games: " +
                      str((Credits[i,game_id] - Credits[i,game_id-print_at]) / print_at))

    if(do_plot):
        # Update plot every 20 games
        if (game_id % 20 == 0):
            if (np.min(Credits) < plt.gca().get_ylim()[0]):
                plt.gca().set_ylim([np.min(Credits)-1000,100])
            for i in range(len(players)):
                lines[i].set_xdata(range(game_id+1))
                lines[i].set_ydata(Credits[i,0:(game_id+1)])
            plt.draw()


print "\n"
for p in players:
    print "Credit of player", p.name, ":", p.credits 

## Plot afterwards

In [None]:
plot_after = True

if(plot_after):
    plt.close("all")
    plt.axis([0, n_games, -1000, 1000])
    colors = plt.get_cmap('jet')(np.linspace(0, 1.0, len(players)))
    lines = [plt.plot([], [], label=p.name, color = colors[i])[0] for i, p in enumerate(players)]
    plt.legend(loc='lower left', fontsize=10)
    plt.show()
    if (np.min(Credits) < plt.gca().get_ylim()[0]):
        plt.gca().set_ylim([np.min(Credits)-1000,100])
    for i in range(len(players)):
        lines[i].set_xdata(range(game_id+1))
        lines[i].set_ydata(Credits[i,0:(game_id+1)])
    plt.draw()