In [13]:
import numpy as np

In [14]:
N_CARDS = 3
N_ACTIONS = 2
N_GAMES = 10
POPULATION_SIZE = 50
TOURNAMENT_PROB = 0.6
MUTATION_PROB = 0.2
CREEP_RATE = 0.1
CARDS = np.arange(N_CARDS)
INDIVIDUALS = np.arange(POPULATION_SIZE)
N_OPPONENTS = 10
MAX_SCORE = N_GAMES*N_OPPONENTS*2

In [15]:
def compare_cards(c1, c2, score = 1):
    if(c1 > c2): 
        return (score,-score)
    else:
        return (-score,score)

<img src="kuhns_poker_no_fold.png">

In [24]:
def play(p1, p2):

    p1_card, p2_card =  np.random.permutation(CARDS)[:2]
    p1_bets =  np.random.rand() < p1[2*p1_card]
   
    
    if(p1_bets):
        p2_bets = np.random.rand() < p1[2*p1_card]

        if(p2_bets):
            return compare_cards(p1_card, p2_card, score = 2)
        elif(not p2_bets):
            return (1,-1)
        
    elif(not p1_bets):
        p2_bets = np.random.rand() < p1[2*p1_card+1]
        
        if(p2_bets):
            p1_bets_again =  np.random.rand() < p1[2*p1_card+1]
            
            if(p1_bets_again):
                return compare_cards(p1_card, p2_card, score = 2)
            elif(not p1_bets_again):
                return(1,-1)
        
        elif(not p2_bets):
            return compare_cards(p1_card, p2_card, score = 1)


In [17]:
# NEED TO FIX:
# Player two doesnt neccesarily face N_OPPONENTS opponents and therefore its max score is not the one used. 
# Player two's that happens to play alot of games will be unfairly beneficial

def evaluate(population_1, population_2):
    
    fitness_1 = np.zeros(POPULATION_SIZE)
    fitness_2 = np.zeros(POPULATION_SIZE) 

    for player in INDIVIDUALS:
        for game in range(N_GAMES):
            opponents = np.random.permutation(POPULATION_SIZE)[:N_OPPONENTS]
            for opponent in opponents:
                score = play(population_1[player], population_2[opponent])
                fitness_1[player] += score[0]
                fitness_2[opponent] += score[1]
    

    return (fitness_1/MAX_SCORE, fitness_2/MAX_SCORE)

In [18]:
def tournament_selection(population, fitness):
    
    p = np.random.permutation(POPULATION_SIZE)
    
    i1 = p[0]
    i2 = p[1]
    
    if(fitness[i1] > fitness[i2]):
        i_max = i1
        i_min = i2
    else:
        i_max = i2
        i_min = i1
    
    if(np.random.rand() < TOURNAMENT_PROB):
        return i_max
    else:
        return i_min
            
def wheel_selection(population, fitness):
    temp = fitness + np.min(fitness)
    distribution = temp/np.sum(temp)
    return np.random.choice(np.arange(POPULATION_SIZE), p= distribution)
    

In [19]:
def crossover(individual_1, individual_2):
    
    crossover_point = np.random.randint(len(individual_1))

    child_1 = np.concatenate((individual_1[:crossover_point],individual_2[crossover_point:]))
    child_2 = np.concatenate((individual_2[:crossover_point],individual_1[crossover_point:]))
    
    return (child_1, child_2)

In [20]:
def mutate(individual):
    
    creeps = CREEP_RATE*individual*np.random.randn(len(individual))
    mutations = (np.random.rand(len(individual)) < MUTATION_PROB)*creeps

    return individual + mutations

In [25]:
p1 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)
p2 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)

f1,f2 = evaluate(p1, p2)

In [26]:
for generation in range(100):
    print("Generation", generation)
    new_population_1 = np.zeros_like(p1)
    new_population_2 = np.zeros_like(p2)
    
    for i in range(0,POPULATION_SIZE,2):
        ind11 = tournament_selection(p1,f1)
        ind12 = tournament_selection(p1,f1)
        
        ind21 = tournament_selection(p2,f2)
        ind22 = tournament_selection(p2,f2)
        
        child_11, child_12 = crossover(p1[ind11], p2[ind12])
        child_21, child_22 = crossover(p1[ind21], p2[ind22])
        
        child_11 = mutate(child_11)
        child_12 = mutate(child_12)
        child_21 = mutate(child_21)
        child_22 = mutate(child_22)
        
        new_population_1[i] = child_11
        new_population_1[i+1] = child_12
        new_population_2[i] = child_21
        new_population_2[i+1] = child_22
        
    best_indivudal_1 = np.argmax(f1)
    best_indivudal_2 = np.argmax(f2)
    
    new_population_1[0] = p1[best_indivudal_1]
    new_population_2[0] = p2[best_indivudal_2]
    
    
    p1 = new_population_1
    p2 = new_population_2
    print(np.max(f1), "--" , np.max(f2))
    f1,f2 = evaluate(p1,p2)


Generation 0
0.4 -- 0.02
Generation 1
0.365 -- 0.095
Generation 2
0.4 -- 0.02
Generation 3
0.37 -- 0.02
Generation 4
0.33 -- 0.02
Generation 5
0.355 -- 0.085
Generation 6
0.355 -- 0.03
Generation 7
0.405 -- 0.015
Generation 8
0.34 -- 0.135
Generation 9
0.395 -- 0.01
Generation 10
0.33 -- 0.055
Generation 11
0.395 -- 0.085
Generation 12
0.4 -- 0.065
Generation 13
0.375 -- 0.01
Generation 14
0.39 -- -0.02
Generation 15
0.375 -- 0.0
Generation 16
0.34 -- -0.03
Generation 17
0.36 -- -0.035
Generation 18
0.42 -- -0.01
Generation 19
0.39 -- -0.02
Generation 20
0.365 -- 0.0
Generation 21
0.4 -- 0.02
Generation 22
0.4 -- 0.0
Generation 23
0.42 -- -0.015
Generation 24
0.42 -- -0.045
Generation 25
0.405 -- 0.0
Generation 26
0.395 -- 0.01
Generation 27
0.34 -- -0.03
Generation 28
0.5 -- -0.04
Generation 29
0.43 -- -0.035
Generation 30
0.395 -- -0.015
Generation 31
0.385 -- -0.035
Generation 32
0.405 -- 0.04
Generation 33
0.475 -- -0.035
Generation 34
0.385 -- -0.03
Generation 35
0.355 -- -0.025
G

In [247]:
def print_player(player, first_player = True):
    matrix = player.reshape(N_CARDS, N_ACTIONS)
    
    if(first_player):
        print("Card \t   First Action \tSecond Action")
        print(" \t    R    Ch   F \t   Ca   F")
        for card in range(N_CARDS):
            print(" ",card,\
                  "  |  ",\
                  np.array2string(matrix[card,:3]/np.sum(matrix[card,:3]), precision=2), \
                  "  |  " ,\
                  np.array2string(matrix[card,3:]/np.sum(matrix[card,3:]), precision=2))
    else:
        print("Card \t    If Raise \t \tIf Check")
        print(" \t  Ca   F  \t      Ch    R    F")
        for card in range(N_CARDS):
            print(" ",card,\
                  "  |",\
                  np.array2string(matrix[card,:2]/np.sum(matrix[card,:2]), precision=2), \
                  "\t|  " ,\
                  np.array2string(matrix[card,2:]/np.sum(matrix[card,2:]), precision=2))
            
        

## Best player 1 strategy

In [225]:
print_player(p1[np.argmax(f1)], first_player = True)

Card 	   First Action 	Second Action
 	    R    Ch   F 	   Ca   F
  0   |   [0.34 0.27 0.39]   |   [0.48 0.52]
  1   |   [0.34 0.62 0.04]   |   [0.36 0.64]
  2   |   [0.85 0.12 0.02]   |   [0.97 0.03]


## Best player 2 strategy

In [248]:
print_player(p2[np.argmax(f2)], first_player= False)

Card 	    If Raise 	 	If Check
 	  Ca   F  	      Ch    R    F
  0   | [0.5 0.5] 	|   [0.41 0.43 0.16]
  1   | [0.32 0.68] 	|   [0.1  0.2  0.69]
  2   | [0.88 0.12] 	|   [0.07 0.92 0.01]


<img src="kuhns_poker_no_fold.png">

Let best players face off

In [260]:
for game in range(10):
    score = play(p1[np.argmax(f1)], p2[np.argmax(f2)], print_game = True)
    print("Score: ", score)
    print("")

----- NEW GAME -----
P1 Raises with card 2, P2 Calls with card 1
Score:  (2, -2)

----- NEW GAME -----
P1 Raises with card 1, P2 Folds with card 0
Score:  (1, -1)

----- NEW GAME -----
P1 Raises with card 2, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Raises with card 0, P2 Calls with card 1
Score:  (-2, 2)

----- NEW GAME -----
P1 Fold with card 0, P2 had card 1
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 0, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Fold with card 1, P2 had card 0
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 2, P2 Folds with card 1
Score:  (1, -1)

----- NEW GAME -----
P1 Fold with card 0, P2 had card 1
Score:  (-1, 1)

----- NEW GAME -----
P1 Raises with card 0, P2 Folds with card 1
Score:  (1, -1)



In [27]:
p1[np.argmax(f1)].reshape(3,2)

array([0.37809777, 0.15182482, 0.07488178, 0.36006961, 0.8102644 ,
       0.92412887])