In [1]:
import numpy as np
from random import randint
from scipy.optimize import linprog
import pickle
import matplotlib
import matplotlib.pyplot as plt
import math

In [2]:
def payoff(v,w):
    payoff=v[1]*w[0]-v[0]*w[1]
    return -payoff

In [3]:
def eval_matrix_1pop(popn):
    '''Input: 1 population of strategies
       Output: evaluation matrix of that population'''
    n = len(popn)
    matrix = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            v = popn[i]
            w = popn[j]
            matrix[i][j] = payoff(v,w)
    return matrix

In [4]:
def random_point(lb = -5, ub = 5, n = 2):
    point = np.zeros((n,))
    for i in range(n):
        point[i] = np.random.uniform(lb, ub)
    return point

In [27]:
a = (np.sqrt(3)/2)*np.array([np.cos(0),np.sin(0)])
b = (np.sqrt(3)/2)*np.array([np.cos(2*math.pi/3),np.sin(2*math.pi/3)])
c = (np.sqrt(3)/2)*np.array([np.cos(4*math.pi/3),np.sin(4*math.pi/3)])

popn = [a,b,c]
print(popn)

[array([0.8660254, 0.       ]), array([-0.4330127,  0.75     ]), array([-0.4330127, -0.75     ])]


In [30]:
a = random_point(lb = -5, ub = 5, n = 2)
b = random_point(lb = -5, ub = 5, n = 2)
c = random_point(lb = -5, ub = 5, n = 2)

popn = [a,b,c]
print(popn)

[array([ 0.82153555, -0.24475703]), array([-4.21623733,  2.15814447]), array([-1.40159748,  2.24128702])]


In [6]:
A=eval_matrix_1pop(popn)
print(A)

[[-0.          0.64951905 -0.64951905]
 [-0.64951905 -0.          0.64951905]
 [ 0.64951905 -0.64951905 -0.        ]]


In [7]:
def Nash_eq(A):
    '''Input: matrix A of a population
       Output: Nash equilibrium
       Note that A need to be antisymmetric for this function to generate the right Nash p'''
    n = A.shape[0]
    A_ub = -A.T
    b_ub = np.zeros((n,))
    A_eq = np.ones((1,n))
    b_eq = 1
    soln = linprog(c = np.zeros((n,)), A_ub = A_ub, b_ub = b_ub, A_eq = A_eq, b_eq = b_eq, bounds = (0,1),method='interior-point')
    return soln.x

In [8]:
Nash_eq(A)

array([0.33333333, 0.33333333, 0.33333333])

In [26]:
def oracle(popn,nash_p,A,eta):
    new_agents=[];
    for i in range(len(popn)):
        if(nash_p[i]>0):   
            curr_agent_v=popn[i];
            curr_agent_idx=i;
            next_agent_v=curr_agent_v+eta*gradient_v(popn,nash_p,A,curr_agent_idx,curr_agent_v)
            new_agents.append(next_agent_v)  
      #print(gradient_v(popn,nash_p,curr_agent_v))
    return new_agents

In [10]:
def gradient_v(popn,nash_p,A,curr_agent_idx,agent_v): #agent_p, agent_v are strategies of the current agent "v_t of popn B_t"
    sum_mat=np.zeros((2,));
    for j in range(len(popn)):
        if (A[curr_agent_idx][j]>=0):
            temp_agent_v=popn[j];
#         print(temp_agent_v)
            val_grad_mat=np.array([temp_agent_v[1],-temp_agent_v[0]])
#         print(val_grad_mat)
            sum_mat=sum_mat+nash_p[j]*val_grad_mat; #multiply w.r.t Nash p.        
    return sum_mat

In [28]:
def PSRO_N(popn, eta = 0.01, epoch = 5):
    strategies = popn
    n = len(strategies)
    for i in range(epoch):
        A = eval_matrix_1pop(strategies)
        nash_p = Nash_eq(A)
        print(nash_p)
        new_v_set = oracle(strategies,nash_p,A,eta)
        strategies.extend(new_v_set)
#         print(new_v)
        n = n + 1
        print("There are " + str(n) + " strategies in the population now.")
    return strategies

In [31]:
new_popn = PSRO_N(popn,eta=0.1,epoch=10)



[1.00000000e+00 2.12359215e-12 2.46365442e-13]
There are 4 strategies in the population now.
[3.24614971e-09 1.57036879e-11 1.69397697e-11 9.99999997e-01
 1.57036879e-11 1.69397697e-11]
There are 5 strategies in the population now.
[2.35689642e-02 3.84199925e-02 2.78813140e-04 2.36032667e-02
 3.84199925e-02 2.78813140e-04 2.35689642e-02 3.84199925e-02
 2.78813140e-04 7.74463623e-01 3.84199925e-02 2.78813140e-04]
There are 6 strategies in the population now.
[0.03551656 0.01798624 0.00146233 0.04299838 0.01798624 0.00146233
 0.03551656 0.01798624 0.00146233 0.10025472 0.01798624 0.00146233
 0.03415486 0.02083186 0.00099395 0.03611536 0.01932844 0.00099354
 0.03418135 0.01850765 0.00099314 0.52307849 0.01774824 0.00099273]
There are 7 strategies in the population now.
[0.02153666 0.00834497 0.00187074 0.02726996 0.00834497 0.00187074
 0.02153666 0.00834497 0.00187074 0.03488218 0.00834497 0.00187074
 0.01780583 0.0091458  0.00174974 0.02267503 0.00883398 0.00174965
 0.01790156 0.00853767

There are 11 strategies in the population now.
[2.21108653e-03 5.11910993e-04 1.82180109e-04 2.71878069e-03
 5.11911036e-04 1.82180109e-04 2.21108650e-03 5.11911045e-04
 1.82180110e-04 3.41015760e-03 5.11911020e-04 1.82180109e-04
 1.89706128e-03 5.48520514e-04 1.56577205e-04 2.31490153e-03
 5.34472753e-04 1.56558567e-04 1.90508313e-03 5.20841833e-04
 1.56539929e-04 4.17484381e-03 5.07612033e-04 1.56521293e-04
 1.92779998e-03 5.24092821e-04 1.57360722e-04 2.39009719e-03
 5.17803880e-04 1.57262960e-04 1.94024043e-03 5.11602102e-04
 1.57165200e-04 3.65979475e-03 5.05485868e-04 1.57067438e-04
 1.65642854e-03 5.83545458e-04 1.31619980e-04 2.03825849e-03
 5.61005668e-04 1.31535863e-04 1.67213723e-03 5.39782259e-04
 1.31451775e-04 5.12196079e-03 4.95351467e-04 1.31367713e-04
 1.94650575e-03 5.17573231e-04 1.59001586e-04 2.40676348e-03
 5.14685974e-04 1.58876490e-04 1.95415539e-03 5.11817323e-04
 1.58751393e-04 3.58578194e-03 5.08967097e-04 1.58626297e-04
 1.66358486e-03 5.77874099e-04 1.32652

There are 12 strategies in the population now.
[9.96678712e-04 2.49465372e-04 1.25325081e-04 ... 3.42711120e-01
 2.29794195e-04 5.39990609e-05]
There are 13 strategies in the population now.


In [33]:
new_popn[-10:]

[array([-1.12067799,  2.73936779]),
 array([1.07646274, 0.12562341]),
 array([-4.43607697,  1.90445969]),
 array([-1.11842877,  2.74053223]),
 array([1.10722025, 0.24181156]),
 array([-4.41187701,  1.95321248]),
 array([-1.11592195,  2.74181899]),
 array([ 0.52513473, -0.88133317]),
 array([-4.22973196,  2.3135126 ]),
 array([-1.11311273,  2.7432505 ])]

In [44]:
payoff(new_popn[2],new_popn[3])
new_popn[2]

IndexError: index 1 is out of bounds for axis 0 with size 1

In [54]:
np.array(1,2)

TypeError: data type not understood

In [21]:
A=eval_matrix_1pop(new_popn)
print(A)
print(Nash_eq(A))

[[-0.          0.64951905 -0.64951905 ... -0.06545595  0.77818224
  -0.71272629]
 [-0.64951905 -0.          0.64951905 ... -0.71272893 -0.0654546
   0.77818353]
 [ 0.64951905 -0.64951905 -0.         ...  0.77818488 -0.71272765
  -0.06545724]
 ...
 [ 0.06545595  0.71272893 -0.77818488 ... -0.          0.86050962
  -0.86050962]
 [-0.77818224  0.0654546   0.71272765 ... -0.86050962 -0.
   0.86050962]
 [ 0.71272629 -0.77818353  0.06545724 ...  0.86050962 -0.86050962
  -0.        ]]
[0.01041621 0.01041667 0.01041712 0.0104162  0.01041666 0.01041714
 0.01041619 0.01041667 0.01041714 0.01041619 0.01041665 0.01041716
 0.01041619 0.01041667 0.01041713 0.01041619 0.01041666 0.01041716
 0.01041618 0.01041668 0.01041715 0.01041618 0.01041664 0.01041718
 0.01041619 0.01041667 0.01041713 0.01041619 0.01041666 0.01041716
 0.01041618 0.01041668 0.01041715 0.01041618 0.01041665 0.01041718
 0.01041617 0.01041667 0.01041714 0.01041617 0.01041665 0.01041717
 0.01041617 0.01041668 0.01041716 0.01041617 0.0

