In [1]:
import numpy as np
import random
import itertools
import matplotlib.pyplot as plt

# Let's play one against the other 

In [46]:
cards = ["Ace", "King", "Queen", "Jack", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
signs = ["Club", "Diamond", "Heart", "Spade"]
initialW = 100
n = 7
m = 11
it = int(5*1e4)
lr = .2
exp = .4
decay = 1.
maxBet = 10
parameters = [[1, 1] for i in range(maxBet)]

In [67]:
class Cards: 
    
    def __init__(self, firstPlayer, secondPlayer):
        """Define the deck of cards"""
        self.cards = 4*list(range(n, m+1))
        self.firstPlayer = firstPlayer
        self.secondPlayer = secondPlayer
        self.turn = random.randint(0, 1)
        
    def deal(self):
        """Distribution of cards by the agent :)"""
        random.shuffle(self.cards)
        self.turn = 1 - self.turn
        self.firstPlayer.hand = [self.cards[0], self.cards[2]]
        self.secondPlayer.hand = [self.cards[1], self.cards[3]]
        
    def restart(self):
        self.secondPlayer.reset()
        self.firstPlayer.reset()
        self.turn = random.randint(0, 1)
        
    def reward(self, first, f, second, s, Thompson, tTurn):
        
        if first.action == second.action and sum(first.hand) > sum(second.hand):
            second.worth -= second.action
            st = str(first.hand) + "-" + str(second.action)
            
            if Thompson and not tTurn:
                first.update(st, first.action)
                second.update(second.hand, first.hand, first.action)
            
            elif Thompson and tTurn:
                first.update(first.hand, second.hand, second.action)
                
            else:
                first.update(st, first.action)
            
        elif first.action == second.action and sum(first.hand) < sum(second.hand):
            first.worth -= first.action
            st = str(second.hand) + "-" + str(first.action)
            
            if Thompson and not tTurn:
                second.update(second.hand, first.hand, first.action)
            
            elif Thompson and tTurn:
                first.update(first.hand, second.hand, second.action)
                second.update(st, first.action)
            else:
                second.update(st, first.action)
            
            
        elif second.action == 0:
            second.worth -= s
            
        elif first.action == 0:
            first.worth -= f
            
    def play(self, first, second, human = False, Thompson = False, tTurn = 1):
        
        beforeFirstAction = 1
        actions = list(range(1, min(maxBet, first.worth, second.worth)+1))
        
        if human:
            first.action = first.chooseAction(actions, str(first.hand))
        else:
            first.action = first.chooseAction(actions, None)
            
        beforeSecondAction = 1
        actions = list(range(first.action, min(maxBet, first.worth, second.worth)+1))
        actions.append(0)
        second.action = second.chooseAction(actions, str(second.hand) +"-"+ str(first.action))

        while first.action*second.action > 0 and first.action != second.action:
            
            beforeFirstAction = first.action
            actions = list(range(second.action , min(maxBet, first.worth, second.worth)+1))
            actions.append(0)
            first.action = first.chooseAction(actions, str(first.hand) +"-"+ str(second.action))
            
            if first.action > 0 and second.action != first.action:
                
                beforeSecondAction = second.action
                actions = list(range(first.action, min(maxBet, first.worth, second.worth)+1))
                actions.append(0)
                second.action = second.chooseAction(actions, str(second.hand) +"-"+ str(first.action))

        self.reward(first, beforeFirstAction, second, beforeSecondAction, Thompson, tTurn)

    def train(self, rounds = 10, Thompson = False):

        score = 0
        for _ in range(rounds):
            self.restart()
            while self.firstPlayer.worth * self.secondPlayer.worth > 0:
                self.deal()
                if self.turn:
                    self.play(self.firstPlayer, self.secondPlayer, Thompson = Thompson, tTurn = self.turn)
                else:
                    self.play(self.secondPlayer, self.firstPlayer, Thompson = Thompson, tTurn = self.turn)
            score += (self.firstPlayer.worth > 0) - (self.secondPlayer.worth > 0)
            
        return score
    
    def human(self, human = False, Thompson = False, result = False):

        self.restart()
        while self.firstPlayer.worth * self.secondPlayer.worth > 0:
            self.deal()
            if self.turn:
                self.play(self.firstPlayer, self.secondPlayer, Thompson = Thompson, tTurn = self.turn)
            else:
                self.play(self.secondPlayer, self.firstPlayer, human, Thompson = Thompson, tTurn = self.turn)
            
            if result:
                print("-------------------------")
                print("The agent hand and last action were: {0} {1}".format(self.secondPlayer.hand, self.secondPlayer.action))
                print("-------------------------")
                print("The adversary hand and last action were: {0} {1}".format(self.firstPlayer.hand, self.firstPlayer.action))
                print("-------------------------")
                print("Current Net Worth: {0}-{1}".format(self.firstPlayer.worth, self.secondPlayer.worth))
                print("=========================")
            
        if self.secondPlayer.worth > 0:
            print("\n And the final winner is secondPlayer! :)")
        else:
            print("\n And the final winner is firstPlayer! :(")

In [39]:
class Player:
    
    def __init__(self, human = False):
        self.worth = initialW
        self.statDict = {}
        
    def reset(self):
        self.worth = initialW
        
    def chooseAction(self, actions, state):
        
        if state is None:
            return random.choice(actions)
        
        elif random.random() < exp:
            return random.choice(actions)

        action = None
        vmax = - 1e10
        for act in actions:
            current = 0 if self.statDict.get(state) is None or self.statDict.get(state).get(str(act)) is None else self.statDict.get(state).get(str(act))
            if vmax < current:
                vmax = current
                action = act
                    
            return action
        
    def update(self, state, action):
        if self.statDict.get(state) is None:
            self.statDict[state] = {}
        if self.statDict.get(state).get(str(action)) is None:
            self.statDict[state][str(action)] = 0
        self.statDict[state][str(action)] += lr*(action - self.statDict[state][str(action)])


In [40]:
class HumanPlayer(Player):
        
    def chooseAction(self, actions, state):
        print("State: ", state)
        action = int(input("Input your action: ")) 
        
        while action not in actions:
            print("Try a number in ", actions)
            action = int(input("Input your action: "))  
            
        return action

In [32]:
## TO TRAIN A BASIC SIMPLE MODEL YOU CAN USE, IT IS NOT VERY SMART
# firstPlayer = Player()
# secondPlayer = Player()
# cards = Cards(firstPlayer, secondPlayer)
# cards.train(it)

In [7]:
# TO PLAY AGAINST A VIRTUAL PLAYER, YOU CAN UNCOMMENT THE FOLLOWING LINES AFTER TRAINING THE AGENT
# firstPlayer.reset()
# human = HumanPlayer()
# cards = Cards(firstPlayer, human)
# cards.human()

# Thompson Sampling

In [16]:
class ThompsonSampling(Player):
    def __init__(self, parameters = parameters):
        self.parameters = parameters

    def chooseAction(self, actions, state):
        if state is None:
            return random.choice(actions)
        
        hand, action = state.split("-")
        cards = [int(card) for card in hand[1:-1].split(",")]
        action = int(action)
        
        estimated = random.betavariate(self.parameters[action-1][0], self.parameters[action-1][1])
        estimated = 2*n + int(2*(m-n)*estimated)
    
        if sum(cards) >= estimated: 
            return int(action)
        return 0 
        
    def update(self, firstHand, secondHand, secondAction): 
        # State contains the information on your hand, 
        # what the other played. action is your choice.
        if sum(firstHand) > sum(secondHand):
            self.parameters[secondAction - 1][0] += 0.5
        else:
            self.parameters[secondAction - 1][1] += 0.5

In [68]:
firstPlayer = ThompsonSampling()
secondPlayer = Player()
cards = Cards(firstPlayer, secondPlayer)
cards.train(it, Thompson = True)

21840

In [71]:
cards = Cards(firstPlayer, HumanPlayer())
cards.human(human = True, Thompson = True, result = True)

State:  [10, 10]
Input your action: 5
-------------------------
The agent hand and last action were: [10, 10] 5
-------------------------
The adversary hand and last action were: [8, 10] 0
-------------------------
Current Net Worth: 99-100
State:  [7, 11]-6
Input your action: 6
-------------------------
The agent hand and last action were: [7, 11] 6
-------------------------
The adversary hand and last action were: [10, 8] 6
-------------------------
Current Net Worth: 99-100
State:  [9, 11]
Input your action: 5
-------------------------
The agent hand and last action were: [9, 11] 5
-------------------------
The adversary hand and last action were: [8, 10] 0
-------------------------
Current Net Worth: 98-100
State:  [9, 10]-7
Input your action: 7
-------------------------
The agent hand and last action were: [9, 10] 7
-------------------------
The adversary hand and last action were: [9, 10] 7
-------------------------
Current Net Worth: 98-100
State:  [10, 11]
Input your action: 5


Input your action: 2
-------------------------
The agent hand and last action were: [11, 10] 2
-------------------------
The adversary hand and last action were: [11, 9] 2
-------------------------
Current Net Worth: 34-59
State:  [8, 11]
Input your action: 5
-------------------------
The agent hand and last action were: [8, 11] 5
-------------------------
The adversary hand and last action were: [9, 11] 5
-------------------------
Current Net Worth: 34-54
State:  [10, 10]-2
Input your action: 2
-------------------------
The agent hand and last action were: [10, 10] 2
-------------------------
The adversary hand and last action were: [9, 7] 2
-------------------------
Current Net Worth: 32-54
State:  [7, 8]
Input your action: 2
-------------------------
The agent hand and last action were: [7, 8] 2
-------------------------
The adversary hand and last action were: [10, 11] 2
-------------------------
Current Net Worth: 32-52
State:  [10, 10]-2
Input your action: 2
---------------------