In [None]:
import numpy as np
import random
import itertools
import matplotlib.pyplot as plt

# Let's play one against the other 

In [None]:
cards = ["Ace", "King", "Queen", "Jack", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
signs = ["Club", "Diamond", "Heart", "Spade"]
initialW = 50
n = 7
m = 11
it = int(5*1e4)
lr = .2
exp = .4
decay = 1.
maxBet = 20
ittrain = 1

In [None]:
class Cards: 
    
    def __init__(self, firstPlayer, secondPlayer):
        """Define the deck of cards"""
        self.cards = 4*list(range(n, m+1))
        self.firstPlayer = firstPlayer
        self.secondPlayer = secondPlayer
        self.turn = random.randint(0, 1)
        
    def deal(self):
        """Distribution of cards by the agent :)"""
        random.shuffle(self.cards)
        self.turn = 1 - self.turn
        self.firstPlayer.hand = [self.cards[0], self.cards[2]]
        self.secondPlayer.hand = [self.cards[1], self.cards[3]]
        
    def restart(self):
        self.secondPlayer.reset()
        self.firstPlayer.reset()
        self.turn = random.randint(0, 1)
        
    def reward(self, first, f, second, s):
        
        if first.action == second.action and sum(first.hand) > sum(second.hand):
            second.worth -= second.action
            st = str(first.hand) + "-" + str(second.action)
            first.update(st, first.action)
            
        elif first.action == second.action and sum(first.hand) < sum(second.hand):
            first.worth -= first.action
            st = str(second.hand) + "-" + str(first.action)
            second.update(st, second.action)
            
        elif second.action == 0:
            second.worth -= s
            
        elif first.action == 0:
            first.worth -= f
            
    def play(self, first, second, human = False):
        
        beforeFirstAction = 1
        actions = list(range(1, min(maxBet, first.worth, second.worth)+1))
        
        if human:
            first.action = first.chooseAction(actions, str(first.hand))
        else:
            first.action = first.chooseAction(actions, None)
            
        beforeSecondAction = 1
        actions = list(range(first.action, min(maxBet, first.worth, second.worth)+1))
        actions.append(0)
        second.action = second.chooseAction(actions, str(second.hand) +"-"+ str(firstAction))

        while first.action*second.action > 0 and first.action != second.action:
            
            beforeFirstAction = first.action
            actions = list(range(second.action , min(maxBet, first.worth, second.worth)+1))
            actions.append(0)
            first.action = first.chooseAction(actions, str(first.hand) +"-"+ str(secondAction))
            
            if first.action > 0 and second.action != first.action:
                
                beforeSecondAction = second.action
                actions = list(range(first.action, min(maxBet, first.worth, second.worth)+1))
                actions.append(0)
                second.action = second.chooseAction(actions, str(second.hand) +"-"+ str(firstAction))

        self.reward(first, beforeFirstAction, second, beforeSecondAction)

    def train(self, rounds = 10):

        score = 0
        for _ in range(rounds):
            self.restart()
            while self.firstPlayer.worth * self.secondPlayer.worth > 0:
                self.deal()
                if self.turn:
                    self.play(self.firstPlayer, self.secondPlayer)
                else:
                    self.play(self.secondPlayer, self.firstPlayer)
            score += (self.firstPlayer.worth > 0) - (self.secondPlayer.worth > 0)
            
        return score
    
    def human(self):

        self.restart()
        while self.firstPlayer.worth * self.secondPlayer.worth > 0:
            self.deal()
            if self.turn:
                self.play(self.firstPlayer, self.secondPlayer)
            else:
                self.play(self.secondPlayer, self.firstPlayer, True)
            
            print("The adversary hand and last action were: {0} {1}".format(self.firstPlayer.hand, self.firstPlayer.action))
            print("-------------------------")
            print("Current Net Worth: {0}-{1}".format(self.firstPlayer.worth, self.secondPlayer.worth))
            print("=========================")
            
        if self.secondPlayer.worth > 0:
            "\n ========================= \n ========================= \n And the final winner is YOU! :)"
        else:
            "\n ========================= \n ========================= \n And the final winner is NOT YOU! :("

In [None]:
class Player:
    
    def __init__(self, human = False):
        self.worth = initialW
        self.statDict = {}
        
    def reset(self):
        self.worth = initialW
        
    def chooseAction(self, actions, state):
        
        if state is None:
            return random.choice(actions)
        
        elif random.random() < exp:
            return random.choice(actions)

        action = None
        vmax = - 1e10
        for act in actions:
            current = 0 if self.statDict.get(state) is None or self.statDict.get(state).get(str(act)) is None else self.statDict.get(state).get(str(act))
            if vmax < current:
                vmax = current
                action = act
                    
            return action
        
    def update(self, state, action):
        if self.statDict.get(state) is None:
            self.statDict[state] = {}
        if self.statDict.get(state).get(str(action)) is None:
            self.statDict[state][str(action)] = 0
        self.statDict[state][str(action)] += lr*(action - self.statDict[state][str(action)])


In [None]:
class HumanPlayer(Player):
        
    def chooseAction(self, actions, state):
        print("State: ", state)
        action = int(input("Input your action: ")) 
        
        while action not in actions:
            print("Try a number in ", actions)
            action = int(input("Input your action: "))  
            
        return action

In [None]:
## TO TRAIN A BASIC SIMPLE MODEL YOU CAN USE, IT IS NOT VERY SMART
# firstPlayer = Player()
# secondPlayer = Player()
# cards = Cards(firstPlayer, secondPlayer)
# cards.train(it)

In [None]:
# TO PLAY AGAINST A VIRTUAL PLAYER, YOU CAN UNCOMMENT THE FOLLOWING LINES AFTER TRAINING THE AGENT
# firstPlayer.reset()
# human = HumanPlayer()
# cards = Cards(firstPlayer, human)
# cards.human()

# Thompson Sampling

In [None]:
class ThompsonSampling(Player):
    def __init__(self, parameters):
        self.parameters = parameters

    def chooseAction(self, hand, action):
        
        estimated = random.betavariate(self.parameters[action])
        estimated = 2*n + int(2*(m-n)*estimated)
        
        if sum(hand) >= estimated: 
            return action
        return 0 
        
    def update(self, state, action): 
        # State contains the information on your hand, 
        # what the other played. action is your choice.
        pass