In [14]:
!pip install nashpy
!pip install cvxopt

import nashpy
import numpy as np
from cvxopt import matrix, solvers
from scipy.optimize import linprog





In [15]:

leduc_cards = ["J", "J", "Q", "Q", "K", "K"]
kuhn_cards = ["J", "Q", "K"]

def solve_lp_msne(A, E, F, e, f):
    # Get dimensions
    dim_E = E.shape
    dim_F = F.shape

    # Extend to cover both y and p
    e_new = np.concatenate((np.zeros(dim_F[1]), e))

    # Constraint changes for 2 variables
    H1 = np.hstack((-F, np.zeros((dim_F[0], dim_E[0]))))
    H2 = np.hstack((A, -E.T))
    H3 = np.zeros(dim_E[1])

    # Bounds for both
    lb = np.concatenate((np.zeros(dim_F[1]), -np.inf*np.ones(dim_E[0])))
    ub = np.concatenate((np.ones(dim_F[1]), np.inf*np.ones(dim_E[0])))

    # Solve LP problem
    res = linprog(e_new, A_ub=H2, b_ub=H3, A_eq=H1, b_eq=-f, bounds=list(zip(lb, ub)))

    # Extract solutions {x, y, p, q}
    yp = res.x
    y = yp[:dim_F[1]]
    p = yp[dim_F[1]:dim_F[1]+dim_E[0]]

    # Extend to cover both y and p
    f_new = np.concatenate((np.zeros(dim_E[1]), f))

    # Constraint changes for 2 variables
    H1 = np.hstack((-E, np.zeros((dim_E[0], dim_F[0]))))
    H2 = np.hstack((-A.T, -F.T))
    H3 = np.zeros(dim_F[1])

    # Bounds for both
    lb = np.concatenate((np.zeros(dim_E[1]), -np.inf*np.ones(dim_F[0])))
    ub = np.concatenate((np.ones(dim_E[1]), np.inf*np.ones(dim_F[0])))

    # Solve LP problem
    res = linprog(f_new, A_ub=H2, b_ub=H3, A_eq=H1, b_eq=-e, bounds=list(zip(lb, ub)))

    # Extract solutions {x, y, p, q}
    yp = res.x
    x = yp[:dim_E[1]]
    q = yp[dim_E[1]:dim_E[1]+dim_F[0]]

    return x, y, p, q



In [16]:

# Givens
A = np.array([
    [0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,2,1,0,0,2,1,0,0],
    [0,0,0,0,0,0,0,0,1,0,0,0,1],
    [0,0,0,0,0,0,0,2,0,0,0,2,0],
    [0,0,0,0,0,0,0,-1,0,0,0,-1,0],
    [0,-2,1,0,0,0,0,0,0,2,1,0,0],
    [0,0,0,0,-1,0,0,0,0,0,0,0,1],
    [0,0,0,-2,0,0,0,0,0,0,0,2,0],
    [0,0,0,-1,0,0,0,0,0,0,0,-1,0],
    [0,-2,1,0,0,-2,1,0,0,0,0,0,0],
    [0,0,0,0,-1,0,0,0,-1,0,0,0,0],
    [0,0,0,-2,0,0,0,-2,0,0,0,0,0],
    [0,0,0,-1,0,0,0,-1,0,0,0,0,0]
]) / 6.

F = np.array([
    [1,0,0,0,0,0,0,0,0,0,0,0,0],
    [-1,1,1,0,0,0,0,0,0,0,0,0,0],
    [-1,0,0,1,1,0,0,0,0,0,0,0,0],
    [-1,0,0,0,0,1,1,0,0,0,0,0,0],
    [-1,0,0,0,0,0,0,1,1,0,0,0,0],
    [-1,0,0,0,0,0,0,0,0,1,1,0,0],
    [-1,0,0,0,0,0,0,0,0,0,0,1,1]
])
f = np.array([1,0,0,0,0,0,0])

E = np.array([
    [1,0,0,0,0,0,0,0,0,0,0,0,0],
    [-1,1,1,0,0,0,0,0,0,0,0,0,0],
    [0,0,-1,1,1,0,0,0,0,0,0,0,0],
    [-1,0,0,0,0,1,1,0,0,0,0,0,0],
    [0,0,0,0,0,0,-1,1,1,0,0,0,0],
    [-1,0,0,0,0,0,0,0,0,1,1,0,0],
    [0,0,0,0,0,0,0,0,0,0,-1,1,1]
])
e = np.array([1,0,0,0,0,0,0])
x, y, p, q = solve_lp_msne(A, E, F, e, f)

print(x)
print(y)
print(p)
print(q)

[ 1.          1.         -0.          0.          0.          0.
  1.          0.66666667  0.33333333  0.33333333  0.66666667  0.
  0.66666667]
[ 1.          1.         -0.          1.         -0.          0.33333333
  0.66666667  0.          1.          0.          1.          0.33333333
  0.66666667]
[-0.05555556  0.38888889  0.11111111 -0.11111111 -0.22222222 -0.33333333
 -0.16666667]
[ 0.05555556  0.11111111  0.38888889 -0.22222222  0.11111111 -0.16666667
 -0.16666667]


In [17]:
# construct payoff matrix A for Leduc
J_faceoff = np.array([[0, 1, 1],
                      [-1, 0, -1],
                      [-1, 1, 0]])

Q_faceoff = np.array([[0, -1, -1],
                      [1, 0, 1],
                      [1, -1, 0]])

K_faceoff = np.array([[0, -1, -1],
                      [-1, 0, -1],
                      [1, 1, 0]])

zeros_3x3 = np.zeros((3, 3))

J_opp_fold = zeros_3x3 + 1
J_opp_fold[0][0] = 0

J_we_fold = zeros_3x3 - 1
J_we_fold[0][0] = 0

Q_opp_fold = zeros_3x3 + 1
Q_opp_fold[1][1] = 0

Q_we_fold = zeros_3x3 - 1
Q_we_fold[1][1] = 0

K_opp_fold = zeros_3x3 + 1
K_opp_fold[2][2] = 0

K_we_fold = zeros_3x3 - 1
K_we_fold[2][2] = 0


J_payoff1 = np.reshape([np.concatenate([2 * J_faceoff, J_opp_fold, zeros_3x3, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, zeros_3x3, J_faceoff], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, 2*J_faceoff, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, J_we_fold, zeros_3x3], axis=1)], (12, 12))

Q_payoff1 = np.reshape([np.concatenate([2 * Q_faceoff, Q_opp_fold, zeros_3x3, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, zeros_3x3, Q_faceoff], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, 2*Q_faceoff, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, Q_we_fold, zeros_3x3], axis=1)], (12, 12))

K_payoff1 = np.reshape([np.concatenate([2 * K_faceoff, K_opp_fold, zeros_3x3, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, zeros_3x3, K_faceoff], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, 2*K_faceoff, zeros_3x3], axis=1),
       np.concatenate([zeros_3x3, zeros_3x3, K_we_fold, zeros_3x3], axis=1)], (12, 12))

zeros_12x12 = np.zeros((12, 12))


payoff_leduc1 = np.concatenate([np.concatenate([J_payoff1, zeros_12x12, zeros_12x12], axis=1),
                      np.concatenate([zeros_12x12, Q_payoff1, zeros_12x12], axis=1),
                      np.concatenate([zeros_12x12, zeros_12x12, K_payoff1], axis=1)], axis=0)

A = np.concatenate([np.zeros((37, 1)), np.concatenate([np.zeros((1, 36)), payoff_leduc1], axis=0)], axis=1) / 30

print(J_payoff1)
print(Q_payoff1)
print(K_payoff1)

[[ 0.  2.  2.  0.  1.  1.  0.  0.  0.  0.  0.  0.]
 [-2.  0. -2.  1.  1.  1.  0.  0.  0.  0.  0.  0.]
 [-2.  2.  0.  1.  1.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0. -1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  2.  2.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. -2.  0. -2.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. -2.  2.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. -1. -1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. -1. -1. -1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. -1. -1. -1.  0.  0.  0.]]
[[ 0. -2. -2.  1.  1.  1.  0.  0.  0.  0.  0.  0.]
 [ 2.  0.  2.  1.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 2. -2.  0.  1.  1.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1. -1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1. -1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. -2. -2.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  2. 

In [18]:
# Construct E, e matrices for Leduc
E_first_col = np.array([[-1],
                        [-1],
                        [-1],
                        [0],
                        [0],
                        [0]])

same_open_E = np.array([[1, 0, 0,  1,  0,  0, 0, 0, 0, 0, 0, 0],
                        [0, 1, 0,  0,  1,  0, 0, 0, 0, 0, 0, 0],
                        [0, 0, 1,  0,  0,  1, 0, 0, 0, 0, 0, 0],
                        [0, 0, 0, -1,  0,  0, 1, 0, 0, 1, 0, 0],
                        [0, 0, 0,  0, -1,  0, 0, 1, 0, 0, 1, 0],
                        [0, 0, 0,  0,  0, -1, 0, 0, 1, 0, 0, 1]])

zeros_6x12 = np.zeros((6, 12))

J_E = np.concatenate([E_first_col, same_open_E, zeros_6x12, zeros_6x12], axis=1)
Q_E = np.concatenate([E_first_col, zeros_6x12, same_open_E, zeros_6x12], axis=1)
K_E = np.concatenate([E_first_col, zeros_6x12, zeros_6x12, same_open_E], axis=1)

# E - player 1
# E column: J|Jb, J|Qb, J|Kb, J|Jp, J|Qp, J|Kp, J|Jpb, J|Qpb, J|Kpb, J|Jpp, J|Qpp, J|Kpp, Q|...
# E rows: J|J, J|Q, J|K, J|Jp, J|JQp, J|Kp, Q|...

E = np.concatenate([np.array([[1 if i == 0 else 0 for i in range(37)]]), J_E, Q_E, K_E], axis=0)
e = np.array([0 if i != 0 else 1 for i in range(19)])

print(E)

[[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [-1.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [-1.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [-1.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [ 0.  0.  0.  0. -1.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [ 0.  0.  0.  0.  0. -1.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.]
 [ 0.  0.  0.  0.  0.  0. -1.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0. 

In [19]:
# Construct F, f matrices for Leduc
F_first_col = np.array([[-1],
                        [-1],
                        [-1],
                        [-1],
                        [-1],
                        [-1]])

same_open_F = np.array([[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                        [ 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                        [ 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                        [ 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
                        [ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
                        [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1]])

zeros_6x12 = np.zeros((6, 12))

J_F = np.concatenate([F_first_col, same_open_F, zeros_6x12, zeros_6x12], axis=1)
Q_F = np.concatenate([F_first_col, zeros_6x12, same_open_F, zeros_6x12], axis=1)
K_F = np.concatenate([F_first_col, zeros_6x12, zeros_6x12, same_open_F], axis=1)

# F - player 2
# F column: J|Jb(b), J|Qb(b), J|Kb(b), J|Jp(b), J|Qp(b), J|Kp(b), J|Jb(p), J|Qb(p), J|Kb(p), J|Jp(p), J|Qp(p), J|Kp(p), Q|...
# F rows: J|Jb, J|Qb, J|Kb, J|Jp, J|JQp, J|Kp, Q|...

F = np.concatenate([np.array([[1 if i == 0 else 0 for i in range(37)]]), J_F, Q_F, K_F], axis=0)
f = np.array([0 if i != 0 else 1 for i in range(19)])

print(F.shape)


(19, 37)


In [20]:
x, y, p, q = solve_lp_msne(A, E, F, e, f)

# E - player 1
# E column: J|Jb, J|Qb, J|Kb, J|Jp, J|Qp, J|Kp, J|Jpb, J|Qpb, J|Kpb, J|Jpp, J|Qpp, J|Kpp, Q|...
# E rows: J|J, J|Q, J|K, J|Jp, J|JQp, J|Kp, Q|...

p1_actions = ["b", "p", "pb", "pp"]

p1_strategies = ["{}|{}{}".format(open_card, player_card, action) for open_card in kuhn_cards for action in p1_actions for player_card in kuhn_cards]

x_labels = ["null"] + p1_strategies

# for s in p1_strategies:
#   print(s, end=' ')

# F - player 2
# F column: J|Jb(b), J|Qb(b), J|Kb(b), J|Jp(b), J|Qp(b), J|Kp(b), J|Jb(p), J|Qb(p), J|Kb(p), J|Jp(p), J|Qp(p), J|Kp(p), Q|...
# F rows: J|Jb, J|Qb, J|Kb, J|Jp, J|JQp, J|Kp, Q|...

p2_actions = ["b(b)", "p(b)", "b(p)", "p(p)"]

p2_strategies = ["{}|{}{}".format(open_card, player_card, action) for open_card in kuhn_cards for action in p2_actions for player_card in kuhn_cards]

y_labels = ["null"] + p2_strategies

for i in range(len(x)):
  print("{}:\t\t{:.2f}".format(x_labels[i], abs(x[i])))
# print(x)
for i in range(len(y)):
  print("{}:\t{:.2f}".format(y_labels[i], abs(y[i])))

print(y)
print(p)
print(q)

null:		1.00
J|Jb:		1.00
J|Qb:		0.25
J|Kb:		0.25
J|Jp:		0.00
J|Qp:		0.75
J|Kp:		0.75
J|Jpb:		0.00
J|Qpb:		0.00
J|Kpb:		0.75
J|Jpp:		0.00
J|Qpp:		0.75
J|Kpp:		0.00
Q|Jb:		0.25
Q|Qb:		1.00
Q|Kb:		0.25
Q|Jp:		0.75
Q|Qp:		0.00
Q|Kp:		0.75
Q|Jpb:		0.00
Q|Qpb:		0.00
Q|Kpb:		0.75
Q|Jpp:		0.75
Q|Qpp:		0.00
Q|Kpp:		0.00
K|Jb:		0.00
K|Qb:		0.00
K|Kb:		0.00
K|Jp:		1.00
K|Qp:		1.00
K|Kp:		1.00
K|Jpb:		0.00
K|Qpb:		0.00
K|Kpb:		1.00
K|Jpp:		1.00
K|Qpp:		1.00
K|Kpp:		0.00
null:	1.00
J|Jb(b):	1.00
J|Qb(b):	0.00
J|Kb(b):	0.75
J|Jp(b):	0.00
J|Qp(b):	1.00
J|Kp(b):	0.25
J|Jb(p):	1.00
J|Qb(p):	0.25
J|Kb(p):	0.25
J|Jp(p):	0.00
J|Qp(p):	0.75
J|Kp(p):	0.75
Q|Jb(b):	0.00
Q|Qb(b):	1.00
Q|Kb(b):	0.75
Q|Jp(b):	1.00
Q|Qp(b):	0.00
Q|Kp(b):	0.25
Q|Jb(p):	0.25
Q|Qb(p):	1.00
Q|Kb(p):	0.25
Q|Jp(p):	0.75
Q|Qp(p):	0.00
Q|Kp(p):	0.75
K|Jb(b):	0.67
K|Qb(b):	0.67
K|Kb(b):	1.00
K|Jp(b):	0.33
K|Qp(b):	0.33
K|Kp(b):	0.00
K|Jb(p):	0.67
K|Qb(p):	0.67
K|Kb(p):	0.00
K|Jp(p):	0.33
K|Qp(p):	0.33
K|Kp(p):	1.00
[ 1.          1.       

In [21]:
print(np.matmul(x.T, np.matmul(A, y)))

-0.08333333333333329
