In [10]:
import numpy as np

In [23]:
N_CARDS = 3
N_ACTIONS = 2
N_GAMES = N_CARDS*(N_CARDS-1)*5*2
POPULATION_SIZE = 60
TOURNAMENT_PROB = 0.6
MUTATION_PROB = 0.1/N_CARDS*N_ACTIONS
CREEP_RATE = 0.05
CARDS = np.arange(N_CARDS)
INDIVIDUALS = np.arange(POPULATION_SIZE)
MAX_SCORE = N_GAMES*POPULATION_SIZE*POPULATION_SIZE*2

In [24]:
def compare_cards(c1, c2, score = 1):
    if(c1 > c2): 
        return (score,-score)
    else:
        return (-score,score)

<img src="kuhns_poker_no_fold.png">

In [25]:
def play(p1, p2):

    p1_card, p2_card =  np.random.permutation(CARDS)[:2]
    p1_bets =  np.random.rand() < p1[2*p1_card]
   
    
    if(p1_bets):
        p2_bets = np.random.rand() < p2[2*p2_card]

        if(p2_bets):
            return compare_cards(p1_card, p2_card, score = 2)
        elif(not p2_bets):
            return (1,-1)
        
    elif(not p1_bets):
        p2_bets = np.random.rand() < p2[2*p2_card+1]
        
        if(p2_bets):
            p1_bets_again =  np.random.rand() < p1[2*p1_card+1]
            
            if(p1_bets_again):
                return compare_cards(p1_card, p2_card, score = 2)
            elif(not p1_bets_again):
                return(-1,1)
        
        elif(not p2_bets):
            return compare_cards(p1_card, p2_card, score = 1)


In [33]:
def evaluate(population_1, population_2):
    
    fitness_1 = np.zeros(POPULATION_SIZE)
    fitness_2 = np.zeros(POPULATION_SIZE) 

    for player in INDIVIDUALS:
        for opponent in INDIVIDUALS:
            for game in range(N_GAMES):
                player_score, opponent_score = play(population_1[player], population_2[opponent])
                fitness_1[player] += player_score
                fitness_2[opponent] += opponent_score
    

    return (fitness_1/MAX_SCORE*1000, fitness_2/MAX_SCORE*1000)

In [27]:
def tournament_selection(population, fitness):
    
    p = np.random.permutation(POPULATION_SIZE)
    
    i1 = p[0]
    i2 = p[1]
    
    if(fitness[i1] > fitness[i2]):
        i_max = i1
        i_min = i2
    else:
        i_max = i2
        i_min = i1
    
    if(np.random.rand() < TOURNAMENT_PROB):
        return i_max
    else:
        return i_min
            
def wheel_selection(population, fitness):
    temp = fitness + np.min(fitness)
    distribution = temp/np.sum(temp)
    return np.random.choice(np.arange(POPULATION_SIZE), p= distribution)
    

In [28]:
def crossover(individual_1, individual_2):
    
    crossover_point = np.random.randint(len(individual_1))

    child_1 = np.concatenate((individual_1[:crossover_point],individual_2[crossover_point:]))
    child_2 = np.concatenate((individual_2[:crossover_point],individual_1[crossover_point:]))
    
    return (child_1, child_2)

In [29]:
def mutate(individual):
    
    creeps = CREEP_RATE*np.random.randn(len(individual))
    mutations = (np.random.rand(len(individual)) < MUTATION_PROB)*creeps
    mutated_individual = individual + mutations

    return mutated_individual.clip(0,1)

In [30]:
def print_player(player):

    print("Card \t      a0 prob \t      a2 prob")
    for card in range(N_CARDS):
         print("  {:d}\t|\t{:.0f}%\t|\t{:.0f}%".format(card, player[card*2]*100, player[card*2+1]*100))

In [31]:
p1 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)
p2 = np.random.rand(POPULATION_SIZE, N_CARDS* N_ACTIONS)
new_p1 = p1.copy()
new_p2 = p2.copy()

f1,f2 = evaluate(p1, p2)

In [32]:
for generation in range(1000):
    

    for i in range(0,POPULATION_SIZE,2):
        ind11 = tournament_selection(p1,f1)
        ind12 = tournament_selection(p1,f1)
        
        ind21 = tournament_selection(p2,f2)
        ind22 = tournament_selection(p2,f2)
        
        child_11, child_12 = crossover(p1[ind11], p2[ind12])
        child_21, child_22 = crossover(p1[ind21], p2[ind22])
        
        child_11 = mutate(child_11)
        child_12 = mutate(child_12)
        child_21 = mutate(child_21)
        child_22 = mutate(child_22)
        
        new_p1[i] = child_11
        new_p1[i+1] = child_12
        new_p2[i] = child_21
        new_p2[i+1] = child_22
        
    best_indivudal_1 = np.argmax(f1)
    best_indivudal_2 = np.argmax(f2)
    
    new_p1[0] = p1[best_indivudal_1]
    new_p2[0] = p2[best_indivudal_2]
    
    
    p1 = new_p1.copy()
    p2 = new_p2.copy()
    if(generation % 5 == 0):
        print("Generation: ", generation)
        print("Fitness: ", np.max(f1), np.max(f2))
        print("Strategy player 1: ")
        print_player(p1[np.argmax(f1)])
        print("Strategy player 2: ")
        print_player(p1[np.argmax(f2)])
        
        print("---------------------------------------")
    f1,f2 = evaluate(p1,p2)


Generation:  0
Fitness:  0.003840277777777778 0.0027222222222222222
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	33%	|	11%
  1	|	63%	|	55%
  2	|	14%	|	57%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	82%	|	63%
  1	|	31%	|	21%
  2	|	38%	|	75%
---------------------------------------
Generation:  5
Fitness:  0.0035092592592592593 0.0022546296296296294
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	13%	|	93%
  1	|	8%	|	23%
  2	|	21%	|	67%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	96%	|	15%
  1	|	98%	|	57%
  2	|	96%	|	85%
---------------------------------------
Generation:  10
Fitness:  0.00268287037037037 0.0018310185185185185
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	73%	|	45%
  1	|	33%	|	87%
  2	|	76%	|	54%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	96%	|	6%
  1	|	70%	|	49%
  2	|	89%	|	85%
---------------------------------------
Generation:  15
Fitness:  0.0019097222222222222 0.001537037

Generation:  135
Fitness:  0.0003449074074074074 0.0007731481481481481
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	0%
  1	|	43%	|	4%
  2	|	100%	|	75%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	5%	|	11%
  1	|	43%	|	8%
  2	|	100%	|	60%
---------------------------------------
Generation:  140
Fitness:  0.00036342592592592595 0.0008078703703703704
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	19%	|	10%
  1	|	56%	|	1%
  2	|	90%	|	85%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	1%	|	2%
  1	|	42%	|	61%
  2	|	100%	|	80%
---------------------------------------
Generation:  145
Fitness:  0.0003171296296296296 0.0007453703703703704
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	2%	|	11%
  1	|	38%	|	3%
  2	|	92%	|	77%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	11%
  1	|	29%	|	4%
  2	|	100%	|	85%
---------------------------------------
Generation:  150
Fitness:  0.00047685185185185184 0.0008

Generation:  270
Fitness:  0.0003449074074074074 0.0009398148148148148
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	35%	|	39%
  1	|	9%	|	22%
  2	|	99%	|	86%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	41%	|	8%
  1	|	25%	|	13%
  2	|	100%	|	74%
---------------------------------------
Generation:  275
Fitness:  0.00038888888888888887 0.0012407407407407408
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	14%	|	17%
  1	|	14%	|	66%
  2	|	93%	|	56%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	13%	|	56%
  1	|	13%	|	80%
  2	|	100%	|	72%
---------------------------------------
Generation:  280
Fitness:  0.000400462962962963 0.0012476851851851852
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	17%
  1	|	7%	|	61%
  2	|	100%	|	74%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	8%	|	56%
  1	|	13%	|	66%
  2	|	95%	|	77%
---------------------------------------
Generation:  285
Fitness:  0.000275462962962963 0.0

Generation:  405
Fitness:  0.0010231481481481482 0.0007777777777777777
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	2%
  1	|	22%	|	64%
  2	|	93%	|	100%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	5%
  1	|	0%	|	56%
  2	|	93%	|	99%
---------------------------------------
Generation:  410
Fitness:  0.0010763888888888889 0.0006504629629629629
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	0%
  1	|	0%	|	54%
  2	|	81%	|	91%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	57%	|	3%
  1	|	0%	|	30%
  2	|	92%	|	100%
---------------------------------------
Generation:  415
Fitness:  0.001201388888888889 0.000662037037037037
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	57%	|	3%
  1	|	3%	|	43%
  2	|	100%	|	100%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	66%	|	0%
  1	|	0%	|	71%
  2	|	100%	|	97%
---------------------------------------
Generation:  420
Fitness:  0.0013171296296296297 0.000706018

Generation:  540
Fitness:  0.0016203703703703703 0.0003171296296296296
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	18%	|	0%
  1	|	8%	|	41%
  2	|	95%	|	98%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	14%	|	8%
  1	|	2%	|	65%
  2	|	81%	|	98%
---------------------------------------
Generation:  545
Fitness:  0.0018449074074074073 0.0006319444444444444
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	16%	|	0%
  1	|	7%	|	54%
  2	|	94%	|	98%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	94%	|	13%
  1	|	0%	|	58%
  2	|	57%	|	98%
---------------------------------------
Generation:  550
Fitness:  0.0018310185185185185 0.00021064814814814815
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	91%	|	4%
  1	|	0%	|	41%
  2	|	95%	|	96%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	4%	|	4%
  1	|	1%	|	41%
  2	|	57%	|	98%
---------------------------------------
Generation:  555
Fitness:  0.0015787037037037037 0.000416666

Generation:  675
Fitness:  0.0013310185185185185 0.0009236111111111112
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	100%	|	15%
  1	|	0%	|	28%
  2	|	93%	|	89%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	92%	|	0%
  1	|	11%	|	57%
  2	|	99%	|	95%
---------------------------------------
Generation:  680
Fitness:  0.0012291666666666666 0.0010231481481481482
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	27%
  1	|	3%	|	64%
  2	|	100%	|	98%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	83%	|	27%
  1	|	6%	|	9%
  2	|	100%	|	100%
---------------------------------------
Generation:  685
Fitness:  0.0012175925925925926 0.0013912037037037037
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	87%	|	16%
  1	|	8%	|	11%
  2	|	95%	|	100%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	87%	|	22%
  1	|	7%	|	25%
  2	|	100%	|	93%
---------------------------------------
Generation:  690
Fitness:  0.0010625 0.000756944444

Generation:  805
Fitness:  0.0010532407407407407 0.0019189814814814816
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	100%	|	63%
  1	|	27%	|	69%
  2	|	69%	|	100%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	1%	|	82%
  1	|	13%	|	69%
  2	|	100%	|	84%
---------------------------------------
Generation:  810
Fitness:  0.0006689814814814814 0.0020208333333333332
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	100%	|	63%
  1	|	26%	|	69%
  2	|	100%	|	84%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	17%	|	70%
  1	|	25%	|	69%
  2	|	92%	|	100%
---------------------------------------
Generation:  815
Fitness:  0.0009907407407407408 0.002013888888888889
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	100%	|	63%
  1	|	11%	|	69%
  2	|	98%	|	88%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	1%	|	53%
  1	|	17%	|	56%
  2	|	100%	|	94%
---------------------------------------
Generation:  820
Fitness:  0.001474537037037

Generation:  935
Fitness:  0.0010439814814814815 0.002025462962962963
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	95%	|	68%
  1	|	80%	|	62%
  2	|	100%	|	99%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	96%	|	66%
  1	|	75%	|	59%
  2	|	100%	|	97%
---------------------------------------
Generation:  940
Fitness:  0.00047222222222222224 0.0020324074074074073
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	93%	|	69%
  1	|	77%	|	89%
  2	|	100%	|	100%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	72%
  1	|	18%	|	74%
  2	|	100%	|	99%
---------------------------------------
Generation:  945
Fitness:  0.0006157407407407407 0.0020625
Strategy player 1: 
Card 	      a0 Prob 	      a2 prob
  0	|	99%	|	58%
  1	|	18%	|	86%
  2	|	100%	|	99%
Strategy player 2: 
Card 	      a0 Prob 	      a2 prob
  0	|	0%	|	73%
  1	|	80%	|	73%
  2	|	97%	|	100%
---------------------------------------
Generation:  950
Fitness:  0.0012175925925925926 0.00199

## Best player 1 strategy

In [None]:
print_player(p1[np.argmax(f1)])

## Best player 2 strategy

In [None]:
print_player(p2[np.argmax(f2)])

<img src="kuhns_poker_no_fold.png">