In [None]:
#Code to find the Stackelberg Equilibria of a Pricing Game

In [1]:


import numpy as np
from scipy.optimize import linprog

from operator import add, neg



def precise_stackelberg_equilibrium(leader_payoff_matrix, follower_payoff_matrix):
  num_leader_actions = leader_payoff_matrix.shape[0]
  num_follower_actions = leader_payoff_matrix.shape[1]

  # Define the constraint matrix A_ub and the right-hand side b_ub
  A_ub = np.zeros((num_follower_actions + 2, num_leader_actions))
  b_ub = np.zeros(num_follower_actions + 2)
  bounds = [(0, 1) for _ in range(num_leader_actions)]
  best_leader_payoff = np.min(leader_payoff_matrix)
  best_leader_strategy = np.zeros(num_leader_actions)

  for benchmark_follower_action in range(num_follower_actions):
    row_index = 0
    c_leader = -leader_payoff_matrix[:, benchmark_follower_action]  # maximize leader's payoff when follower plays a particular action
    for i in range(num_follower_actions):
      A_ub[row_index] = follower_payoff_matrix.T[i] - follower_payoff_matrix.T[benchmark_follower_action]
      b_ub[row_index] = 0
      row_index = row_index + 1

    A_ub[row_index] = np.ones(num_leader_actions)
    b_ub[row_index] = 1
    row_index = row_index + 1
    A_ub[row_index] = -1 * np.ones(num_leader_actions)
    b_ub[row_index] = -1

    # Solve the linear programming problem
    result = linprog(c_leader, A_ub=A_ub, b_ub=b_ub, bounds=bounds, method='highs')
    print(result.status)

    # Extract the solution
    leader_optimal_strategy = result.x

    # compare different LPs for different optimizer actions
    follower_benchmark_distribution = np.zeros(num_follower_actions)
    follower_benchmark_distribution[benchmark_follower_action] = 1
    leader_payoff = evaluate_leader_payoff(leader_payoff_matrix, leader_optimal_strategy, follower_benchmark_distribution)
    if (leader_payoff >= best_leader_payoff):
      best_leader_payoff = leader_payoff
      best_leader_strategy = leader_optimal_strategy
      follower_response = benchmark_follower_action
  return best_leader_payoff, best_leader_strategy, follower_response


def precise_stackelberg_value(leader_payoff_matrix, follower_payoff_matrix):
    (val,_,_) = precise_stackelberg_equilibrium(leader_payoff_matrix, follower_payoff_matrix)
    return val

# code for computing mnse from https://github.com/sid230798/Game_Theory/blob/master/Problem3/analyse_equilibrium.py
def msne(a):
    a = a.T
    ## One zero array for later (z, x)
    ess = np.ones(a.shape[0]+1)
    ess[0] = 0

    c = -1*(1-ess)  ##[-1, 0 ,0 ,0] -1 coeff for z and 0 for x (Max z == min(-z))
    A_ub = np.concatenate((np.ones((1, a.shape[1])), -1*a), axis=0).T
    B_ub = np.zeros(a.shape[1])
    A_eq = np.expand_dims(ess, axis=0)
    B_eq = np.ones(1)
    bounds = [(None, None)] + [(0,1)]*a.shape[0]
    result = linprog(c, A_ub=A_ub, b_ub=B_ub, A_eq=A_eq, b_eq=B_eq, bounds=bounds)
    p1_val, p1_distribution = result.x[0], result.x[1:]

    ## For 2nd player distribution
    ess = np.ones(a.shape[1]+1)
    ess[0] = 0
    c = (1-ess)
    A_ub = np.concatenate((-1*np.ones((a.shape[0], 1)), a), axis=1)
    B_ub = np.zeros(a.shape[0])
    A_eq = np.expand_dims(ess, axis=0)
    A_eq = np.concatenate((A_eq, 1-A_eq), axis=0)
    B_eq = np.array([1, p1_val]) ## Dual Principle w* = z*
    bounds = [(None, None)] + [(0,1)]*a.shape[1]
    result = linprog(c, A_ub=A_ub, b_ub=B_ub, A_eq=A_eq, b_eq=B_eq, bounds=bounds)
    p2_val, p2_distribution = result.x[0], result.x[1:]

    print("MSNE are : {", tuple(p1_distribution), "," ,tuple(p2_distribution), "}")

def maxmin(a):
    a = a.T
    print(a)
    ## One zero array for later (z, x)
    ess = np.ones(a.shape[0]+1)
    ess[0] = 0

    c = -1*(1-ess)  ##[-1, 0 ,0 ,0] -1 coeff for z and 0 for x (Max z == min(-z))
    A_ub = np.concatenate((np.ones((1, a.shape[1])), -1*a), axis=0).T
    B_ub = np.zeros(a.shape[1])
    A_eq = np.expand_dims(ess, axis=0)
    B_eq = np.ones(1)
    bounds = [(None, None)] + [(0,1)]*a.shape[0]
    print("data:")
    print(c)
    print(A_ub)
    print(B_ub)
    print(A_eq)
    print(B_eq)
    print(bounds)
    result = linprog(c, A_ub=A_ub, b_ub=B_ub, A_eq=A_eq, b_eq=B_eq, bounds=bounds)
    p1_distribution = result.x[1:]
    return p1_distribution

def transform_game_matrix(game_matrix, mixed_strategies):
    """
    Transforms each game matrix based on the set of mixed strategies. Each mixed strategy
    becomes a new 'pure' strategy in the transformed games for the followers.
    """
    # Convert the distributions list into a NumPy array for easier manipulation
    distributions_array = np.array(mixed_strategies)
    transformed_matrix = np.dot(game_matrix.T, distributions_array.T).T
    return transformed_matrix


def evaluate_leader_payoff(game_matrix, leader_strategy, follower_strategy):
    """
    Evaluates the leader's expected payoff on a certain follower game given the leader's strategy and the follower's strategy.
    """
    leader_strategy = np.array(leader_strategy)
    follower_strategy = np.array(follower_strategy)
    expected_payoff = 0
    for leader_strategy, leader_prob in enumerate(leader_strategy):
        for follower_action in range(len(follower_strategy)):
          expected_payoff += leader_prob * follower_strategy[follower_action] * game_matrix[leader_strategy][follower_action]
    return expected_payoff








In [2]:
#Prisoners dilemma game matrix

leader_matrix = np.array([[3, 0],
              [5, 1]])
follower_matrix = np.array([[3, 5],
              [0, 1]])

best_leader_payoff, best_leader_strategy, follower_response = precise_stackelberg_equilibrium(leader_matrix, follower_matrix)

2


TypeError: iteration over a 0-d array