In [None]:
import cvxpy as cp
import numpy as np

def solve_stackelberg_game(q_values_data):
    # Convert q_values_data to matrix
    num_defender_actions = max([entry['defender'] for entry in q_values_data]) + 1
    num_attacker_actions = max([entry['attacker'] for entry in q_values_data]) + 1
    Q_matrix = np.zeros((num_attacker_actions, num_defender_actions))
    for entry in q_values_data:
        Q_matrix[entry['attacker']][entry['defender']] = entry['q_value']
    print(Q_matrix)
    # Variables
    q = cp.Variable(Q_matrix.shape[1], nonneg=True)  # Defender's strategy
    z = cp.Variable()  # Worst-case expected payoff for defender

    # Objective: Minimize z (worst-case expected payoff for defender)
    objective = cp.Minimize(z)

    # Constraints
    constraints = [
        cp.sum(q) == 1,  # Defender's strategy should be a valid probability distribution
        z >= np.min(Q_matrix)  # z should be greater than or equal to the minimum Q-value
    ]
    
    # Expected payoff for each attacker action should be at least z
    for i in range(num_attacker_actions):
        constraints.append(Q_matrix[i] @ q <= z)

    # Form and solve the problem
    prob = cp.Problem(objective, constraints)
    prob.solve()

    # Extract the optimal strategy for the defender
    defender_strategy_cvxpy = q.value


    return {
        "Defender's Optimal Strategy": defender_strategy_cvxpy,
        "Minimum Expected Payoff for Defender": prob.value
    }


# Example usage:
# q_values_data = [
#     {'defender': 0, 'attacker': 0, 'q_value': 2.875578551458811},
#     {'defender': 0, 'attacker': 1, 'q_value': 3.056369607957987},
#     {'defender': 0, 'attacker': 2, 'q_value': 4.0227094368930505},
#     {'defender': 1, 'attacker': 0, 'q_value': 2.8071575865839846},
#     {'defender': 1, 'attacker': 1, 'q_value': 3.1118154849842843},
#     {'defender': 1, 'attacker': 2, 'q_value': 4.59990844267451},
#     {'defender': 2, 'attacker': 0, 'q_value': 2.884321680025897},
#     {'defender': 2, 'attacker': 1, 'q_value': 3.586291563285632},
#     {'defender': 2, 'attacker': 2, 'q_value': 4.1127920519612555}
# ]

q_values_data = [{'defender': 0, 'attacker': 0, 'q_value': -3.8756854565446464}, 
                 {'defender': 0, 'attacker': 1, 'q_value': -2.265702286453962}, 
                 {'defender': 0, 'attacker': 2, 'q_value': -3.467078694389465}, 
                 {'defender': 1, 'attacker': 0, 'q_value': -4.25523030477664}, 
                 {'defender': 1, 'attacker': 1, 'q_value': -3.444391083127282}, 
                 {'defender': 1, 'attacker': 2, 'q_value': -3.1855277873757535}, 
                 {'defender': 2, 'attacker': 0, 'q_value': -4.23960687623448}, 
                 {'defender': 2, 'attacker': 1, 'q_value': -2.063042116749623}, 
                 {'defender': 2, 'attacker': 2, 'q_value': -3.5771692712357543}]





results = solve_stackelberg_game(q_values_data)
print(results)


In [None]:
import jax
import jax.numpy as jnp
import optax

# Payoff matrix
payoff_matrix = jnp.array([
    [-3.87568546, -4.2552303,  -4.23960688],
    [-2.26570229, -3.44439108, -2.06304212],
    [-3.46707869, -3.18552779, -3.57716927]
])

# Distance squared between two players (still unused in the current context)
distance_squared = jnp.sum((jnp.array([0.0, 0.0]) - jnp.array([2.0, 2.0]))**2)

# Define the Lagrangian for the inner loop
def lagrangian_inner(strategy_defender, strategy_attacker, lam):
    expected_payoff = jnp.dot(jnp.dot(strategy_defender, payoff_matrix), strategy_attacker)
    constraint = distance_squared - 0.5**2
    return expected_payoff - lam * constraint

# Gradient computations
grad_lagrangian_defender = jax.jit(jax.grad(lagrangian_inner, argnums=0))
grad_lagrangian_attacker = jax.jit(jax.grad(lagrangian_inner, argnums=1))
grad_lagrangian_lambda = jax.jit(jax.grad(lagrangian_inner, argnums=2))

# Optimizers
optimizer_defender = optax.sgd(0.001)
optimizer_attacker = optax.sgd(0.001)
optimizer_lambda = optax.sgd(0.001)

def nested_optimization(initial_strategy_defender, initial_strategy_attacker, initial_lambda):
    params = {
        'strategy_defender': initial_strategy_defender,
        'strategy_attacker': initial_strategy_attacker,
        'lambda': initial_lambda
    }

    opt_state_defender = optimizer_defender.init(params)
    opt_state_attacker = optimizer_attacker.init(params)
    opt_state_lambda = optimizer_lambda.init(params)
    
    # Outer loop for defender optimization
    for _ in range(100):
        
        # Inner loop for attacker response
        for _ in range(100):
            grad_defender = -1*grad_lagrangian_defender(params['strategy_defender'], params['strategy_attacker'], params['lambda'])
            grad_attacker = grad_lagrangian_attacker(params['strategy_defender'], params['strategy_attacker'], params['lambda'])
            grad_lambda = -1*grad_lagrangian_lambda(params['strategy_defender'], params['strategy_attacker'], params['lambda'])
            
            # Update attacker strategy
            updates, opt_state_attacker = optimizer_attacker.update({'strategy_attacker': grad_attacker}, opt_state_attacker)
            updates['strategy_defender'] = jnp.zeros_like(params['strategy_defender'])
            updates['lambda'] = jnp.zeros_like(params['lambda'])
            params = optax.apply_updates(params, updates)
            
            # Update lambda
            updates, opt_state_lambda = optimizer_lambda.update({'lambda': grad_lambda}, opt_state_lambda)
            updates['strategy_defender'] = jnp.zeros_like(params['strategy_defender'])
            updates['strategy_attacker'] = jnp.zeros_like(params['strategy_attacker'])
            params = optax.apply_updates(params, updates)

            # Ensure lambda remains non-negative
            params['lambda'] = jnp.maximum(0, params['lambda'])
            
            # Normalize attacker strategy to be a valid probability distribution
            params['strategy_attacker'] /= jnp.sum(params['strategy_attacker'])
        
        # Update defender strategy based on attacker's response
        updates, opt_state_defender = optimizer_defender.update({'strategy_defender': grad_defender}, opt_state_defender)
        updates['strategy_attacker'] = jnp.zeros_like(params['strategy_attacker'])
        updates['lambda'] = jnp.zeros_like(params['lambda'])
        params = optax.apply_updates(params, updates)

        # Normalize defender strategy to be a valid probability distribution
        params['strategy_defender'] /= jnp.sum(params['strategy_defender'])

    return params['strategy_defender'], params['strategy_attacker']

# Test the nested optimization
initial_strategy_defender = jnp.array([1/3, 1/3, 1/3])
initial_strategy_attacker = jnp.array([1/3, 1/3, 1/3])
initial_lambda = jnp.array(1.0)
optimized_strategy_defender, optimized_strategy_attacker = nested_optimization(initial_strategy_defender, initial_strategy_attacker, initial_lambda)

def compute_expected_payoff(strategy_defender, strategy_attacker):
    return jnp.dot(jnp.dot(strategy_defender, payoff_matrix), strategy_attacker)



print("Optimized Strategy (Defender):", optimized_strategy_defender)
print("Optimized Strategy (Attacker):", optimized_strategy_attacker)

compute_expected_payoff(optimized_strategy_defender, optimized_strategy_attacker)
