In [1]:
import numpy as np

In [44]:
# This matrix represents A's payoffs
# If A plays strategy 0, and B plays strategy 1
# the payoff for A is payoff[0][1]
# the payoff for B is 1 - payoff[0][1]

payoff_matrix = np.array( [[1/2,1/2,2/5,2/5,1/8,5/12,3/10,1/2],
                           [1/2,1/2,2/3,2/3,1/4,5/8,1/2,7/10],
                           [3/5,1/3,1/2,1/2,1/2,1/2,3/8,7/12],
                           [3/5,1/3,1/2,1/2,1/2,1/2,3/4,7/8],
                           [7/8,3/4,1/2,1/2,1/2,1/2,1/3,3/5],
                           [7/12,3/8,1/2,1/2,1/2,1/2,1/3,3/5],
                           [7/10,1/2,5/8,1/4,2/3,2/3,1/2,1/2],
                           [1/2,3/10,5/12,1/8,2/5,2/5,1/2,1/2]])



In [45]:
def pure(i : int) -> list[float]:
    arr = [0] * 8
    arr[i] = 1
    return arr


In [46]:
def g_a(alpha: list[float], beta: list[float]):
    assert(len(alpha) == 8)
    assert(len(beta) == 8)
    payoff = 0
    
    for i in range(len(alpha)):
        payoff += alpha[i] * sum(beta * payoff_matrix[i])
    return payoff

def g_b(alpha: list[float], beta: list[float]):
    assert(len(alpha) == 8)
    assert(len(beta) == 8)
    payoff = 0
    
    for i in range(len(beta)):
        payoff += beta[i] * sum((alpha * payoff_matrix[i]))
    return payoff

In [47]:
def chi(alpha: list[float], beta: list[float]):
    current_payoff = g_a(alpha, beta)
    new_alpha = []
    for i in range(len(alpha)):
        new_alpha.append(
            max(0, g_a(pure(i), beta) - current_payoff)
        )
    sum_new_alpha = sum(new_alpha)
    normalized_alpha = []
    
    for i in range(len(alpha)):
        normalized_alpha.append(
            (alpha[i] + new_alpha[i]) / (1 + sum_new_alpha))
    return normalized_alpha


In [48]:
def psi(alpha: list[float], beta: list[float]):
    current_payoff = g_b(alpha, beta)
    new_beta = []
    for i in range(len(beta)):
        new_beta.append(
            max(0, g_b(alpha, pure(i)) - current_payoff)
        )
    sum_new_beta = sum(new_beta)
    normalized_beta = []
    
    for i in range(len(beta)):
        normalized_beta.append(
            (beta[i] + new_beta[i]) / (1 + sum_new_beta))

    return normalized_beta

In [51]:
def f(alpha: list[float], beta: list[float]):
    
    alpha = np.array(alpha)
    beta = np.array(beta)
    
    prev_alpha = np.array([])
    prev_beta = np.array([])
    i = 0
    while True:
        prev_alpha = alpha
        prev_beta = beta
        
        alpha = chi(prev_alpha, prev_beta)
        beta = psi(prev_alpha, prev_alpha)
        
        flag = True
        for i in range(len(alpha)):
            if (round(alpha[i], 4) != round(prev_alpha[i], 4)) or (round(beta[i], 4) != round(prev_beta[i], 4)):
                flag = False
                break
        
        if flag:
            break
    
    return (alpha, beta)

for i in range(8):
    for j in range(8):
        a, b = f(pure(i), pure(j))
        a = np.round(a, decimals=2)
        b = np.round(b, decimals=2)
        print (a)
        print (b)
        


[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.47 0.5  0.   0.01 0.  ]
[0.   0.01 0.   0.47 0.5  0.   0.01 0.  ]
[0.   0.01 0.   0.53 0.44 0.   0.01 0.  ]
[0.   0.01 0.   0.53 0.44 0.   0.01 0.  ]
[0.   0.01 0.   0.44 0.53 0.   0.01 0.  ]
[0.   0.01 0.   0.44 0.53 0.   0.01 0.  ]
[0.   0.01 0.   0.47 0.51 0.   0.01 0.  ]
[0.   0.01 0.   0.47 0.51 0.   0.01 0.  ]
[0.01 0.   0.   0.54 0.44 0.   0.01 0.  ]
[0.01 0.   0.   0.54 0.44 0.   0.01 0.  ]
[0.   0.01 0.   0.44 0.54 0.   0.01 0.  ]
[0.   0.01 0.   0.44 0.54 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.01 0.  ]
[0.   0.01 0.   0.55 0.43 0.   0.0