This was just a first attempt at getting something working which sort of looked like what we wanted
to simulate for honeypot allocation.  The numbers are all arbitrarily selected and it is as simple as possible.

In [1]:
from gurobipy import *

In [20]:
# two services, two attackers,
NUM_ATTACKERS = 2
NUM_NODES = 10
NUM_SERVICES = 2
M = 99999
# reward[s][theta][a] is reward when service s is used for attacker theta action a
# first attacker has 2 actions for service 0 and 3 for service 1
defender_legit_rewards = [
    [
        [0, -5],
        [-1, -2, -3]
    ],
    [
        [-1, -4, -7],
        [-5, -2]
    ]
]
# defender rewards for actions on honeypots are negative of legit rewards for defenders
# attacker rewards for actions on legit servers are negative of legit rewards for defenders
# attacker rewards for actions on honeypots are the same as legit rewards for defenders

attacker_prob = [0.5, 0.5]

#required services (must have 2 of service 0 and 3 of service 1)
x = [0.2, 0.2]

m = Model('test2')

x_prime = []
# mixed strategy over 2 legit service options
# must have at least 2 of the first one and 3 of the second
x_prime.append(m.addVar(lb=0, vtype=GRB.CONTINUOUS, name='x0'))
x_prime.append(m.addVar(lb=0, vtype=GRB.CONTINUOUS, name='x1'))

# n: attacker pure strategy
# n[theta][s][a] is whether attack a for service s for attacker theta is selected
# attackers only choose one attack
n = [] 
for theta in range(NUM_ATTACKERS):
    n_t = []
    for s in range(NUM_SERVICES):
        n_s = []
        for a in range(len(defender_legit_rewards[s][theta])):
            n_s.append(m.addVar(vtype=GRB.BINARY, name='n_{0}_{1}_{2}'.format(theta, s ,a)))
        n_t.append(n_s)
    n.append(n_t)
    
v = []
for v_i in range(NUM_ATTACKERS):
    v.append(m.addVar(vtype=GRB.CONTINUOUS, name='v_{0}'.format(v_i)))

m.setObjective(sum(attacker_prob[theta] * n[theta][s][a] * (x[s] * defender_legit_rewards[s][theta][a] + \
                                                            x_prime[s] * -1 * defender_legit_rewards[s][theta][a]) \
                    for a in range(len(defender_legit_rewards[s][theta])) \
                   for theta in [0,1] \
                   for s in [0,1]), GRB.MAXIMIZE)

# how to rewrite constraint that requires the attacker to choose the best action available given the 
# defender's best action

# I think we can restrict n to binary (instead of number of attacks) because attacking multiple times will just
# give a linear increase in the reward.  Also, I think the attacker can always just choose a single attack instead
# of multiple as there is going to always be a dominant single attack that will net the most points and the attacker
# will only pick multiple attacks if they have the same max value


m.addConstr(sum(x_prime[s] + x[s] for s in range(NUM_SERVICES)) == 1, 'defender_strat')
m.addConstrs((sum(sum(n[theta][s]) for s in range(NUM_SERVICES)) == 1 for theta in range(NUM_ATTACKERS)),
               'attacker_strat')
m.addConstrs((0 <= v[theta] - sum(-1 * defender_legit_rewards[s][theta][a] * x[s] +
                                  defender_legit_rewards[s][theta][a] * x_prime[s] for s in range(NUM_SERVICES)) \
             for a in range(len(defender_legit_rewards[s][theta])) \
             for theta in range(NUM_ATTACKERS)),
             'attacker_best_strat_1')
m.addConstrs((v[theta] - sum(-1 * defender_legit_rewards[s][theta][a] * x[s] + 
                             defender_legit_rewards[s][theta][a] * x_prime[s] for s in range(NUM_SERVICES)) \
             <= (1 - n[theta][s][a]) * M
             for a in range(len(defender_legit_rewards[s][theta])) \
             for theta in range(NUM_ATTACKERS)),
             'attacker_best_strat_2')

m.optimize()



Optimize a model with 11 rows, 14 columns and 38 nonzeros
Model has 7 quadratic objective terms
Variable types: 4 continuous, 10 integer (10 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+05]
  Objective range  [1e-01, 5e-01]
  QObjective range [1e+00, 5e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [2e-01, 1e+05]
Presolve removed 4 rows and 5 columns
Presolve time: 0.01s
Presolved: 17 rows, 14 columns, 37 nonzeros
Variable types: 9 continuous, 5 integer (5 binary)

Root relaxation: objective 1.400000e+00, 6 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

*    0     0               0       1.4000000    1.40000  0.00%     -    0s

Explored 0 nodes (6 simplex iterations) in 0.05 seconds
Thread count was 4 (of 4 available processors)

Solution count 1: 1.4 

Optimal solution found (tolerance 1.00e-04)
Best objective 1.400000000000e+00,

In [22]:
for var in m.getVars():
    print(var.varName, var.x)
print('Obj:', m.objVal)

('x0', 0.6000000000000001)
('x1', 0.0)
('n_0_0_0', 0.0)
('n_0_0_1', 1.0)
('n_0_1_0', 0.0)
('n_0_1_1', 0.0)
('n_0_1_2', 0.0)
('n_1_0_0', 0.0)
('n_1_0_1', 1.0)
('n_1_0_2', 0.0)
('n_1_1_0', 0.0)
('n_1_1_1', 0.0)
('v_0', 0.2)
('v_1', 0.6)
('Obj:', 1.4000000000000004)
