In [353]:
import numpy as np
import matplotlib.pyplot as plt
import numba
from numba import jit

In [377]:
N_CARDS = 3
RESOLUTION = 10
N_ACTIONS = 2
N_GAMES = 5000
BET_SIZE = 1
POPULATION_SIZE = 30
TOURNAMENT_PROB = 0.7
CROSSOVER_PROB = 0
MUTATION_PROB = 0# 0.1/RESOLUTION
CARDS = np.arange(N_CARDS)
INDIVIDUALS = np.arange(POPULATION_SIZE)
MAX_SCORE = N_GAMES*POPULATION_SIZE*POPULATION_SIZE*2
DECODING_VECTOR =np.power(2*np.ones(RESOLUTION),-np.arange(1,RESOLUTION+1))

<img src="kuhns_poker_no_fold.png">

In [378]:
@jit(nopython=True)
def play(p1, p2):

    p1_card, p2_card =  np.random.permutation(CARDS)[:2]
    
    p1_bets =  np.random.rand() < p1[p1_card,0,:]@DECODING_VECTOR
   
    
    if(p1_bets):
        p2_bets = np.random.rand() < p2[p2_card,0,:]@DECODING_VECTOR

        if(p2_bets):
            if(p1_card > p2_card):
                return (2*BET_SIZE,-2*BET_SIZE)
            else: 
                return (-2*BET_SIZE,2*BET_SIZE)
        elif(not p2_bets):
            return (BET_SIZE,-BET_SIZE)
        
    elif(not p1_bets):
        p2_bets = np.random.rand() < p2[p2_card,1,:]@DECODING_VECTOR
        
        if(p2_bets):
            p1_bets_again =  np.random.rand() < p1[p1_card,1,:]@DECODING_VECTOR
            
            if(p1_bets_again):
                if(p1_card > p2_card):
                    return (2*BET_SIZE,-2*BET_SIZE)
                else: 
                    return (-2*BET_SIZE,2*BET_SIZE)
            elif(not p1_bets_again):
                return (-BET_SIZE,BET_SIZE)
        
        elif(not p2_bets):
            if(p1_card > p2_card):
                return (BET_SIZE,-BET_SIZE)
            else: 
                return (-BET_SIZE,BET_SIZE)


In [379]:
@jit(nopython=True)
def evaluate(population_1, population_2):
 
    fitness_1 = np.zeros(POPULATION_SIZE)
    fitness_2 = np.zeros(POPULATION_SIZE) 

    for player in INDIVIDUALS:
        for opponent in INDIVIDUALS:
            for game in range(N_GAMES):
                player_score, opponent_score = play(population_1[player], population_2[opponent])
                fitness_1[player] += player_score
                fitness_2[opponent] += opponent_score
    
    


    return (fitness_1/(N_GAMES*POPULATION_SIZE), fitness_2/(N_GAMES*POPULATION_SIZE))

In [380]:
@jit(nopython=True)
def tournament_selection(population, fitness):
    
    p = np.random.permutation(POPULATION_SIZE)
    
    i1 = p[0]
    i2 = p[1]
    
    if(fitness[i1] > fitness[i2]):
        i_max = i1
        i_min = i2
    else:
        i_max = i2
        i_min = i1
    
    if(np.random.rand() < TOURNAMENT_PROB):
        return i_max
    else:
        return i_min
            
def wheel_selection(population, fitness):
    temp = fitness + np.min(fitness)
    distribution = temp/np.sum(temp)
    return np.random.choice(np.arange(POPULATION_SIZE), p= distribution)
    

In [381]:
@jit(nopython=True)
def crossover(individual_1, individual_2):
    
    if(np.random.rand() < CROSSOVER_PROB):
        child_1 = np.zeros_like(individual_1)
        child_2 = np.zeros_like(individual_1)

        for i in range(N_CARDS):
            for j in range(N_ACTIONS):
                cp = np.random.randint(RESOLUTION)
                child_1[i,j,:] = np.concatenate((individual_1[i,j,:cp],individual_2[i,j,cp:]))
                child_2[i,j,:] = np.concatenate((individual_2[i,j,:cp],individual_1[i,j,cp:]))

        return(child_1, child_2)
    else:
        return(individual_1, individual_2)

In [382]:
#@jit(nopython=True)
def mutate(individual):
    
    mutated_genes = np.random.rand(N_CARDS, N_ACTIONS, RESOLUTION) < MUTATION_PROB
    individual[mutated_genes] = 1-individual[mutated_genes]
    

    return individual

In [383]:
def print_player(player):

    print("Card \t      a0 prob \t      a2 prob")
    for card in range(N_CARDS):
         print("  {:d}\t|\t{:.0f}%\t|\t{:.0f}%".format(card, player[card,0,:]@DECODING_VECTOR*100, player[card,1,:]@DECODING_VECTOR*100))

In [384]:
p1 = np.random.randint(0,2,size = (POPULATION_SIZE, N_CARDS, N_ACTIONS, RESOLUTION)).astype(np.float64)
p2 = np.random.randint(0,2,size = (POPULATION_SIZE, N_CARDS, N_ACTIONS, RESOLUTION)).astype(np.float64)
new_p1 = p1.copy()
new_p2 = p2.copy()

f1,f2 = evaluate(p1, p2)

In [385]:
p1_turn = False
for generation in range(10000):
    
    #if(generation % 200) : p1_turn = not p1_turn
    '''
    for i in range(0,POPULATION_SIZE,2):
        if(p1_turn):
            ind11 = tournament_selection(p1,f1)
            ind12 = tournament_selection(p1,f1)
            
            child_11, child_12 = crossover(p1[ind11], p1[ind12])
            
            child_11 = mutate(child_11)
            child_12 = mutate(child_12)
            
            new_p1[i] = child_11
            new_p1[i+1] = child_12
            
            best_indivudal_1 = np.argmax(f1)
            
            new_p1[0] = p1[best_indivudal_1]
            
            p1 = new_p1.copy()
        
        else:
            
            ind21 = tournament_selection(p2,f2)
            ind22 = tournament_selection(p2,f2)
        
        
            child_21, child_22 = crossover(p2[ind21], p2[ind22])
        
        
            child_21 = mutate(child_21)
            child_22 = mutate(child_22)
        
        
            new_p2[i] = child_21
            new_p2[i+1] = child_22
        
    
            best_indivudal_2 = np.argmax(f2)

            new_p2[0] = p2[best_indivudal_2]

            p2 = new_p2.copy()
    '''
    f1,f2 = evaluate(p1,p2)
    
    
    if(generation % 10 == 0):
        print("Generation: ", generation)
        print("Fitness: ", np.max(f1), np.max(f2))
        #print("Strategy player 1: ")
        #print_player(p1[np.argmax(f1)])
        #print("Strategy player 2: ")
        #print_player(p2[np.argmax(f2)])
        
        print("---------------------------------------")
    


Generation:  0
Fitness:  0.3538133333333333 0.13165333333333334
---------------------------------------
Generation:  10
Fitness:  0.3499333333333333 0.13375333333333334
---------------------------------------


KeyboardInterrupt: 

## Best player 1 strategy

In [None]:
print_player(p1[np.argsort(f1)[-1]])

## Best player 2 strategy

In [122]:
print_player(p2[np.argsort(f2)[-1]])

Card 	      a0 prob 	      a2 prob
  0	|	86%	|	47%
  1	|	95%	|	68%
  2	|	42%	|	58%


<img src="kuhns_poker_no_fold.png">

In [75]:
p1[:,]

array([[[[0.54098347, 0.64915783, 0.73632378, ..., 0.82139975,
          0.87776996, 0.29976857],
         [0.85861205, 0.01670313, 0.84070599, ..., 0.71473305,
          0.88845362, 0.16734321]],

        [[0.94745242, 0.35378947, 0.7730972 , ..., 0.99903171,
          0.46024463, 0.75786959],
         [0.52933596, 0.13541305, 0.60659418, ..., 0.17522498,
          0.70446257, 0.44460968]],

        [[0.19773398, 0.68503469, 0.03658238, ..., 0.5244156 ,
          0.51371181, 0.8805567 ],
         [0.55469052, 0.62652457, 0.13840244, ..., 0.98243021,
          0.06388858, 0.42621035]]],


       [[[0.45901653, 0.64915783, 0.73632378, ..., 0.82139975,
          0.12223004, 0.29976857],
         [0.14138795, 0.01670313, 0.84070599, ..., 0.71473305,
          0.88845362, 0.83265679]],

        [[0.05254758, 0.64621053, 0.7730972 , ..., 0.99903171,
          0.53975537, 0.75786959],
         [0.52933596, 0.86458695, 0.39340582, ..., 0.82477502,
          0.70446257, 0.44460968]],

        

In [263]:
avg_player_1 = np.mean(p1[np.argsort(f1)[-10:]], axis = 0)

In [264]:
print_player(player_1)

Card 	      a0 prob 	      a2 prob
  0	|	51%	|	59%
  1	|	66%	|	53%
  2	|	40%	|	79%


In [320]:
vec = [10,100,500,1000,2000,5000,10000]
v = np.zeros(len(vec))

for j,n_games in enumerate(vec):
    s = np.zeros(100)
    for i in range(100):
        f1,f2 = evaluate(p1,p2,n_games)
        s[i] = f2[0]
    v[j] = np.var(s)
    print(n_games, v[j])

10 0.00805362111111111
100 0.0007005308444444443
500 0.00010311463599999995


KeyboardInterrupt: 

In [306]:
np.mean(s)

0.10649636666666668

In [307]:
v

array([8.06321222e-03, 7.86377067e-04, 1.71218512e-04, 7.65482243e-05,
       3.14833134e-05, 1.72068132e-05, 7.47784869e-06])

In [308]:
v/0.1

array([8.06321222e-02, 7.86377067e-03, 1.71218512e-03, 7.65482243e-04,
       3.14833134e-04, 1.72068132e-04, 7.47784869e-05])