In [58]:
import numpy as np

# Fictitious play in a two player game
action_nb = 2
player_nb = 2
loss_array = np.array([[3,2],[2,1]])
loss_array = np.array([[0.3, 1],[0,0.6]])

past_actions = np.zeros((player_nb, action_nb))

step_nb = 100
for step in range(step_nb):
    played_action = np.zeros(player_nb)
    if (step==0):
        for player_id in range(player_nb):
            played_action[player_id] = np.random.randint(0, action_nb)
    else:
        for player_id in range(player_nb):
            #actions = numpy.identity(action_nb)
            empirical_adversary = past_actions[player_id, :]/(float(step))
            if (player_id == 0):
                # player 0 wants to minimize his loss
                chosen_action_id = np.argmin(np.dot(loss_array, empirical_adversary))
            else:
                # player 1 wants to maximize his loss
                chosen_action_id = np.argmax(np.dot(loss_array, empirical_adversary))
            past_actions[player_id, chosen_action_id] += 1

print("Loss array for this game : ")
print(loss_array)
print("Frequency of play for each action")
for player_id in range(player_nb):
    print("For player " + str(player_id) + " : ")
    print(past_actions[player_id, :]/float(step_nb))

Loss array for this game : 
[[0.3 1. ]
 [0.  0.6]]
Frequency of play for each action
For player 0 : 
[0.01 0.98]
For player 1 : 
[0.99 0.  ]


In [98]:
def simulated_player(loss_array):
    action_nb = loss_array.shape[0]
    action_chosen = np.random.randint(0, action_nb)
    #action_chosen = 0
    return(action_chosen)

In [99]:
def is_square(x):
    return(int(np.sqrt(x)) == np.sqrt(x))

In [100]:
## Deterministic exploration-exploitation (cf p 222)
import numpy as np

# deterministic exploration-exploitation play in a two player game
action_nb = 2
player_nb = 2
loss_array = np.array([[0.3,0.5],[0.5,1]])
loss_array = np.array([[0.3, 1],[0,0.6]]) # Prisonner dilemna

# parameters
step_nb = 500

past_actions = np.zeros((player_nb, step_nb)).astype(int)
mu = np.zeros((action_nb, step_nb))
mu_chap = np.zeros(step_nb+1)

for step in range(step_nb):
    selected_action_J = simulated_player(loss_array)
    past_actions[1, step] = selected_action_J
    
    # On peut probablement le mettre en version calcul vectoriel. Utile ?
    for action in range(action_nb):
        indexes_action = np.where(past_actions[0, :step] == action)[0]
        if len(indexes_action) ==0:
            mu[action] = 1
        else:
            Jt = past_actions[1, indexes_action]
            mu[action, step] = np.mean(loss_array[action, Jt])
    
    # Exploration
    if (is_square(step)):
        selected_action_I = 0
    elif (is_square(step-1)):
        selected_action_I = 1
    
    # Exploitation
    else:
        selected_action_I = np.argmin(mu[:, step])
        
    past_actions[0, step] = selected_action_I
    mu_chap[step+1] = (step*mu_chap[step] + loss_array[selected_action_I, selected_action_J])/(step+1)
        
print("loss array for this game : ")
print(loss_array)
print("Frequency of play for each action")
print("For player " + "0" + " : ")
print(mu[:,-5:].transpose())
print("mu_chapeau")
print(mu_chap)

## Remarquer que limsup mu_chap <= limsup min(mu)

loss array for this game : 
[[0.3 1. ]
 [0.  0.6]]
Frequency of play for each action
For player 0 : 
[[0.72608696 0.30254237]
 [0.72608696 0.30190275]
 [0.72608696 0.30126582]
 [0.72608696 0.30063158]
 [0.72608696 0.3012605 ]]
mu_chapeau
[0.         1.         0.65       0.43333333 0.475      0.58
 0.58333333 0.5        0.5125     0.45555556 0.51       0.46363636
 0.425      0.39230769 0.40714286 0.38       0.39375    0.38823529
 0.36666667 0.34736842 0.36       0.34285714 0.35454545 0.36521739
 0.35       0.36       0.38461538 0.39259259 0.37857143 0.3862069
 0.39333333 0.38064516 0.3875     0.39393939 0.38235294 0.38857143
 0.37777778 0.37567568 0.36578947 0.37179487 0.3775     0.36829268
 0.35952381 0.36511628 0.37045455 0.36222222 0.3673913  0.35957447
 0.36458333 0.36938776 0.368      0.37254902 0.37692308 0.38113208
 0.38518519 0.38909091 0.39285714 0.38596491 0.37931034 0.37288136
 0.37666667 0.38032787 0.38387097 0.38730159 0.38125    0.38
 0.38333333 0.38656716 0.38088235 0.37