In [1]:
import numpy as np

In [57]:
N_CARDS = 3
N_ACTIONS = 5
N_GAMES = 10
POPULATION_SIZE = 50
TOURNAMENT_PROB = 0.6
MUTATION_PROB = 0.2
CREEP_RATE = 0.1
CARDS = np.arange(N_CARDS)
INDIVIDUALS = np.arange(POPULATION_SIZE)
N_OPPONENTS = 10
MAX_SCORE = N_GAMES*N_OPPONENTS*2

In [86]:
def compare_cards(c1, c2, score = 1):
    if(c1 > c2): 
        return (score,-score)
    else:
        return (-score,score)
    
def choose_action(player, card, available_actions):
    v = player[[N_ACTIONS*card+action for action in available_actions]]
    
    if np.all(v == 0): # In case all zeros
        v = np.ones_like(v)/len(v)
    else: # Normalize to probability
        v += np.abs(np.min(v))
        v /= np.sum(v)
    
    return np.random.choice(available_actions , p = v)

In [253]:
p1_dict = {"raise" : 0, "check" : 1, "fold" : 2, "call to raise" : 3, "fold to raise" : 4}
p2_dict = {"call" : 0, "fold to raise" : 1, "check": 2, "raise" : 3, "fold to check" : 4}

def play(p1, p2, print_game = False):
    if print_game: print("----- NEW GAME -----")

    p1_card, p2_card =  np.random.permutation(CARDS)[:2]
    p1_action = choose_action(p1, p1_card, [0,1,2])    
    
    if(p1_action == 0): # Raise
        p2_action = choose_action(p2, p2_card, [0,1])
    elif(p1_action == 1): # Check
        p2_action = choose_action(p2, p2_card, [2,3,4])
    
    elif(p1_action == 2): # Fold
        if print_game: print("P1 Fold with card {:d}, P2 had card {:d}".format(p1_card, p2_card))
        return (-1,1)
    
    
    
    if(p2_action == 0): # Raise / Call
        if print_game: print("P1 Raises with card {:d}, P2 Calls with card {:d}".format(p1_card, p2_card))
        return compare_cards(p1_card, p2_card, score = 2)
    
    elif(p2_action == 1): # Raise / Fold
        if print_game: print("P1 Raises with card {:d}, P2 Folds with card {:d}".format(p1_card, p2_card))
        return (1,-1) 
    
    elif(p2_action == 2): # Check / Check
        if print_game: print("P1 Checks with card {:d}, P2 Checks with card {:d}".format(p1_card, p2_card))
        return compare_cards(p1_card, p2_card)  
    
    elif(p2_action == 3): # Check / Raise
        p1_action_2 = choose_action(p1,p1_card, [3,4])

        if(p1_action_2 == 3): # Check / Raise / Call
            if print_game: print("P1 Checks with card {:d}, P2 Raises with card {:d}, P1 Calls".format(p1_card, p2_card))
            score = compare_cards(p1_card, p2_card,  score = 2)
            return score
                
        if(p1_action_2 == 4): # Check / Raise / Fold
            if print_game: print("P1 Checks with card {:d}, P2 Raises with card {:d}, P1 Folds".format(p1_card, p2_card))
            return (-1,1)
        
    elif(p2_action == 4): # Check / Fold
        if print_game: print("P1 Checks with card {:d}, P2 Folds with card {:d}".format(p1_card, p2_card))
        return (1,-1)


In [88]:
def evaluate(population_1, population_2):
    
    fitness_1 = np.zeros(POPULATION_SIZE)
    fitness_2 = np.zeros(POPULATION_SIZE) 
    
    for player in INDIVIDUALS:
        for game in range(N_GAMES):
            opponents = np.random.permutation(POPULATION_SIZE)[:N_OPPONENTS]
            for opponent in opponents:
                score = play(population_1[player], population_2[opponent])
                fitness_1[player] += score[0]
                fitness_2[opponent] += score[1]

    
    return (fitness_1/MAX_SCORE, fitness_2/MAX_SCORE)

In [89]:
def tournament_selection(population, fitness):
    
    p = np.random.permutation(POPULATION_SIZE)
    
    i1 = p[0]
    i2 = p[1]
    
    if(fitness[i1] > fitness[i2]):
        i_max = i1
        i_min = i2
    else:
        i_max = i2
        i_min = i1
    
    if(np.random.rand() < TOURNAMENT_PROB):
        return i_max
    else:
        return i_min
            
def wheel_selection(population, fitness):
    temp = fitness + np.min(fitness)
    distribution = temp/np.sum(temp)
    return np.random.choice(np.arange(POPULATION_SIZE), p= distribution)
    

In [90]:
def crossover(individual_1, individual_2):
    
    crossover_point = np.random.randint(len(individual_1))

    child_1 = np.concatenate((individual_1[:crossover_point],individual_2[crossover_point:]))
    child_2 = np.concatenate((individual_2[:crossover_point],individual_1[crossover_point:]))
    
    return (child_1, child_2)

In [91]:
def mutate(individual):
    
    creeps = CREEP_RATE*individual*np.random.randn(len(individual))
    mutations = (np.random.rand(len(individual)) < MUTATION_PROB)*creeps

    return individual + mutations

In [105]:
p1 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)
p2 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)

f1,f2 = evaluate(p1, p2)

In [106]:
for generation in range(100):
    print("Generation", generation)
    new_population_1 = np.zeros_like(p1)
    new_population_2 = np.zeros_like(p2)
    
    for i in range(0,POPULATION_SIZE,2):
        ind11 = tournament_selection(p1,f1)
        ind12 = tournament_selection(p1,f1)
        
        ind21 = tournament_selection(p2,f2)
        ind22 = tournament_selection(p2,f2)
        
        child_11, child_12 = crossover(p1[ind11], p2[ind12])
        child_21, child_22 = crossover(p1[ind21], p2[ind22])
        
        child_11 = mutate(child_11)
        child_12 = mutate(child_12)
        child_21 = mutate(child_21)
        child_22 = mutate(child_22)
        
        new_population_1[i] = child_11
        new_population_1[i+1] = child_12
        new_population_2[i] = child_21
        new_population_2[i+1] = child_22
        
    best_indivudal_1 = np.argmax(f1)
    best_indivudal_2 = np.argmax(f2)
    
    new_population_1[0] = p1[best_indivudal_1]
    new_population_2[0] = p2[best_indivudal_2]
    
    
    p1 = new_population_1
    p2 = new_population_2
    print(np.max(f1), "--" , np.max(f2))
    f1,f2 = evaluate(p1,p2)


Generation 0
0.185 -- 0.23
Generation 1
0.295 -- 0.18
Generation 2
0.175 -- 0.185
Generation 3
0.195 -- 0.185
Generation 4
0.12 -- 0.17
Generation 5
0.135 -- 0.25
Generation 6
0.145 -- 0.165
Generation 7
0.215 -- 0.185
Generation 8
0.165 -- 0.12
Generation 9
0.17 -- 0.175
Generation 10
0.14 -- 0.215
Generation 11
0.18 -- 0.24
Generation 12
0.14 -- 0.23
Generation 13
0.175 -- 0.17
Generation 14
0.155 -- 0.145
Generation 15
0.19 -- 0.115
Generation 16
0.185 -- 0.145
Generation 17
0.145 -- 0.145
Generation 18
0.215 -- 0.105
Generation 19
0.215 -- 0.12
Generation 20
0.135 -- 0.13
Generation 21
0.17 -- 0.14
Generation 22
0.15 -- 0.145
Generation 23
0.18 -- 0.145
Generation 24
0.2 -- 0.115
Generation 25
0.175 -- 0.135
Generation 26
0.215 -- 0.11
Generation 27
0.145 -- 0.115
Generation 28
0.195 -- 0.11
Generation 29
0.205 -- 0.125
Generation 30
0.185 -- 0.145
Generation 31
0.17 -- 0.165
Generation 32
0.195 -- 0.15
Generation 33
0.21 -- 0.19
Generation 34
0.195 -- 0.15
Generation 35
0.265 -- 0

In [247]:
def print_player(player, first_player = True):
    matrix = player.reshape(N_CARDS, N_ACTIONS)
    
    if(first_player):
        print("Card \t   First Action \tSecond Action")
        print(" \t    R    Ch   F \t   Ca   F")
        for card in range(N_CARDS):
            print(" ",card,\
                  "  |  ",\
                  np.array2string(matrix[card,:3]/np.sum(matrix[card,:3]), precision=2), \
                  "  |  " ,\
                  np.array2string(matrix[card,3:]/np.sum(matrix[card,3:]), precision=2))
    else:
        print("Card \t    If Raise \t \tIf Check")
        print(" \t  Ca   F  \t      Ch    R    F")
        for card in range(N_CARDS):
            print(" ",card,\
                  "  |",\
                  np.array2string(matrix[card,:2]/np.sum(matrix[card,:2]), precision=2), \
                  "\t|  " ,\
                  np.array2string(matrix[card,2:]/np.sum(matrix[card,2:]), precision=2))
            
        

## Best player 1 strategy

In [225]:
print_player(p1[np.argmax(f1)], first_player = True)

Card 	   First Action 	Second Action
 	    R    Ch   F 	   Ca   F
  0   |   [0.34 0.27 0.39]   |   [0.48 0.52]
  1   |   [0.34 0.62 0.04]   |   [0.36 0.64]
  2   |   [0.85 0.12 0.02]   |   [0.97 0.03]


## Best player 2 strategy

In [248]:
print_player(p2[np.argmax(f2)], first_player= False)

Card 	    If Raise 	 	If Check
 	  Ca   F  	      Ch    R    F
  0   | [0.5 0.5] 	|   [0.41 0.43 0.16]
  1   | [0.32 0.68] 	|   [0.1  0.2  0.69]
  2   | [0.88 0.12] 	|   [0.07 0.92 0.01]


<img src="kuhns_poker.png">

Let best players face off

In [260]:
for game in range(10):
    score = play(p1[np.argmax(f1)], p2[np.argmax(f2)], print_game = True)
    print("Score: ", score)
    print("")

----- NEW GAME -----
P1 Raises with card 2, P2 Calls with card 1
Score:  (2, -2)

----- NEW GAME -----
P1 Raises with card 1, P2 Folds with card 0
Score:  (1, -1)

----- NEW GAME -----
P1 Raises with card 2, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Raises with card 0, P2 Calls with card 1
Score:  (-2, 2)

----- NEW GAME -----
P1 Fold with card 0, P2 had card 1
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 0, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Fold with card 1, P2 had card 0
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 2, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Fold with card 0, P2 had card 1
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 0, P2 Folds with card 1
Score:  (1, -1)

