In [195]:
import torch
import itertools

In [196]:
# deviations are tuples, first index of deviation must always specify player (at least for now)

In [311]:
class InverseCorrelatedEquilibriumProblem:
    
    def __init__(self,
                 K,
                 player_action_dims,
                 observed_strategy,
                 payoff_features,
                 deviations_dim,
                 get_deviation_iter,
                 apply_deviation):
        self.num_players = len(player_action_dims)
        self.player_action_dims = player_action_dims
        self.observed_strategy = observed_strategy
        self.payoff_features_fn = payoff_features
        self.deviations_dim = deviations_dim
        self.get_deviation_iter = get_deviation_iter
        self.apply_deviation_fn = apply_deviation
        self.memoize_regrets_dict = {}
        assert self.deviations_dim[0] == self.num_players
        self.K = K
    
    def enumerate_joint_actions(self):
        return itertools.product(*[range(d) for d in self.player_action_dims])
    
    def predicted_strategy(self, theta):
        unnormalized_dist = torch.zeros(*self.player_action_dims)
        # dot product of each regret feat with each theta
        for joint_action in self.enumerate_joint_actions():
            action_regret_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
            action_regret_scalars = torch.sum(action_regret_feats * theta, dim=len(theta.shape)-1)
            unnormalized_dist[joint_action] = torch.exp(-torch.sum(action_regret_scalars))
        Z = torch.sum(unnormalized_dist)
        return unnormalized_dist / Z

    def compute_phi_regrets_for_action(self, action_tens):
        key = tuple(action_tens.numpy())
        if key in self.memoize_regrets_dict:
            return self.memoize_regrets_dict[key]
        
        
    # these are the instantaneous regrets for all the specific deviations
        regret_feats = torch.zeros(*self.deviations_dim, self.K, requires_grad=False)
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            deviation_applied = self.apply_deviation_fn(action_tens, deviation)
            # get regrets for specific player only (player is specified by 0 of deviation)
            regret_feats[deviation] = self.payoff_features_fn(deviation_applied)[deviation[0]] - self.payoff_features_fn(action_tens)[deviation[0]]
        self.memoize_regrets_dict[key] = regret_feats
        return regret_feats
    
    def compute_expected_regret_feats(self, action_dist):
        total_regret_feats = torch.zeros(*self.deviations_dim, self.K, requires_grad=False)
        n = 0
        for joint_action in self.enumerate_joint_actions():
            n += 1
            total_regret_feats += action_dist[joint_action] * self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
        return total_regret_feats / n
    
    
    def analytic_gradient(self, theta):
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        regret_feats_observed = self.compute_expected_regret_feats(self.observed_strategy)
        regret_feats_predicted = self.compute_expected_regret_feats(self.predicted_strategy(theta))
        g = torch.zeros_like(theta, requires_grad=False)
        for deviation in dev_iter():
            this_deviation_theta = theta[deviation].view(*[1 for _ in deviation],-1) # unsqueeze to broadcast
            # sorry that is a really hacky way to do it, but i think it does what we want
            # i.e. add one empty dim for all dims of deviations, then -1 for the dim that is size K
            little_scalar_regrets = torch.sum(regret_feats_observed * this_deviation_theta, dim=len(theta.shape)-1)
            #  now argmax
            
            fstar_ind = little_scalar_regrets.argmax()
            
            g[deviation] = regret_feats_observed.view(-1,self.K)[fstar_ind] - regret_feats_predicted[deviation]
        return g
            

    def maxent_dual_objective(self, theta):
        bigZ = torch.tensor(0.0, requires_grad=True)
    
        # for each joint action in A
        for joint_action in self.enumerate_joint_actions():
            little_r_a_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
            # scalar features for all deviations f with their own theta_fs
            little_r_a_scalar = torch.sum(little_r_a_feats * theta, dim=len(theta.shape)-1)
            # sum up, exp, add to Z
            bigZ = bigZ + torch.exp( -torch.sum(little_r_a_scalar))
        obj = torch.log(bigZ)
        # computing expected big regret for theta_f is max over phi_f of r_f(predicted | theta_f)
        # phi_f here is just the whole phi
        expected_er_feats = self.compute_expected_regret_feats(self.observed_strategy)

        # for each deviation
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            this_deviation_theta = theta[deviation].view(*[1 for _ in deviation],-1) # unsqueeze to broadcast
            # sorry that is a really hacky way to do it, but i think it does what we want
            # i.e. add one empty dim for all dims of deviations, then -1 for the dim that is size K
            little_scalar_regrets = torch.sum(expected_er_feats * this_deviation_theta, dim=len(theta.shape)-1)
            # little_scalar_regrets contains the regret for theta_f for all the different fs
            big_Regret = torch.max(little_scalar_regrets)
            obj = obj + big_Regret
        return obj

In [312]:
def rps_feats(action_tuple):
    # 0 is rock, 1 is paper, 2 is scissors
    p1, p2 = action_tuple
    # feat_vecs has shape N, K
    if p1 == 0:
        if p2 == 0:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 2:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
    elif p1 == 1:
        if p2 == 0:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
    elif p1 == 2:
        if p2 == 0:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 1:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])

In [313]:
def external_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                yield (i, j)
    return e

In [314]:
def switch_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                for k in range(player_action_dims[i]):
                    yield (i, j, k)
    return e

In [315]:
def apply_external_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, action = deviation
    new_action_tens[player] = action
    return new_action_tens

In [316]:
def apply_switch_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, actionx, actiony = deviation
    if new_action_tens[player] == actionx:
        new_action_tens[player] = actiony
    return new_action_tens

In [290]:
from torch import optim
import math

In [291]:
def optimize_problem(prob_obj, theta, epochs=100, lr=0.1):
    optimizer = optim.SGD([theta], lr=lr)
    for i in range(epochs):
        optimizer.zero_grad()
        loss = prob_obj.maxent_dual_objective(theta)
        print(loss.item())
        loss.backward()
        optimizer.step()

In [325]:
def optimize_analytic(prob_obj, theta, epochs=100, lr=0.1):
    for i in range(epochs):
        g = prob_obj.analytic_gradient(theta)
        theta -= lr*g
    return theta

In [318]:
def optimize_with_projections(prob_obj, theta, epochs=100, lr=0.1):
    optimizer = optim.Adam([theta], lr=lr)
    for i in range(epochs):
        l2_norms = torch.norm(theta, 2, dim=len(theta.shape)-1)
        scalings = torch.min(torch.ones_like(l2_norms), 1.0/l2_norms)
        theta.data = theta.data * scalings.unsqueeze(len(theta.shape)-1)
        
        optimizer.zero_grad()
        loss = prob_obj.maxent_dual_objective(theta)
        loss.backward()
        optimizer.step()


In [319]:
nash_eq_rps = torch.tensor([1/3 + 0.001,1/3,1/3 - 0.001]).view(-1,1) @ torch.tensor([1/3,1/3 - 0.01,1/3 + 0.01]).view(1,-1)

In [320]:
nash_eq_rps

tensor([[0.1114, 0.1081, 0.1148],
        [0.1111, 0.1078, 0.1144],
        [0.1108, 0.1075, 0.1141]])

In [321]:
my_obj = InverseCorrelatedEquilibriumProblem(2, (3, 3), nash_eq_rps, rps_feats, (2,3), external_enumerator, apply_external_deviation)

In [322]:
my_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[ 0.,  0.],
         [ 0., -1.],
         [ 1., -1.]],

        [[ 0., -1.],
         [ 0.,  0.],
         [ 1., -1.]]])

In [298]:
my_obj.predicted_strategy(ext_theta)

tensor([[0.0113, 0.1564, 0.1413],
        [0.3235, 0.0068, 0.0828],
        [0.1357, 0.1396, 0.0026]], grad_fn=<DivBackward0>)

In [328]:
analytic_theta = torch.rand(2,3,2)

In [334]:
optimize_analytic(my_obj, analytic_theta, epochs=10000)

tensor([[[-6.9537e-05,  1.3165e-04],
         [ 9.4938e-06,  5.7676e-05],
         [ 6.0048e-05,  2.7983e-06]],

        [[-6.5082e-05,  2.8552e-05],
         [-9.8819e-05,  7.7228e-05],
         [-8.0911e-05,  5.7632e-05]]])

In [335]:
my_obj.predicted_strategy(analytic_theta)

tensor([[0.1111, 0.1111, 0.1111],
        [0.1111, 0.1111, 0.1112],
        [0.1112, 0.1111, 0.1111]])

In [336]:
switch_obj = InverseCorrelatedEquilibriumProblem(2, (3,3), nash_eq_rps, rps_feats, (2,3,3), switch_enumerator, apply_switch_deviation)

In [337]:
switch_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[[ 0.,  0.],
          [ 0., -1.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]],


        [[[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0., -1.],
          [ 0.,  0.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]]])

In [344]:
analytic_theta = torch.rand(2,3,3,2, requires_grad=False)

In [345]:
switch_obj.predicted_strategy(analytic_theta)

tensor([[0.0074, 0.1662, 0.1147],
        [0.2262, 0.0089, 0.0635],
        [0.2303, 0.1796, 0.0031]])

In [346]:
optimize_analytic(switch_obj, analytic_theta, epochs=10000)

tensor([[[[ 2.0746e-01,  1.0376e-01],
          [ 2.7552e-05, -2.6473e-05],
          [ 5.2302e-02, -2.9471e-02]],

         [[ 2.1693e-02, -2.6166e-02],
          [ 9.3484e-02,  1.8532e-01],
          [-5.3041e-02,  2.2770e-02]],

         [[ 5.7437e-02,  9.8091e-02],
          [ 2.5953e-02, -2.5751e-02],
          [ 2.2412e-01,  4.4416e-01]]],


        [[[-6.7583e-05,  3.3810e-05],
          [ 5.2689e-02,  2.6368e-02],
          [ 1.8386e-05, -7.7079e-06]],

         [[ 9.4466e-02,  1.8715e-01],
          [-2.8446e-05,  5.6285e-05],
          [ 1.0382e-01,  1.8895e-02]],

         [[-5.2737e-06, -2.2211e-05],
          [-2.2360e-02,  2.2373e-02],
          [ 2.6455e-01,  1.3234e-01]]]])

In [347]:
switch_obj.predicted_strategy(analytic_theta)

tensor([[0.1114, 0.1134, 0.1119],
        [0.1109, 0.1074, 0.1108],
        [0.1133, 0.1107, 0.1101]])

In [189]:
int_theta = torch.rand(2,3,3,2, requires_grad=True)

In [190]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.0088, 0.1171, 0.2765],
        [0.0904, 0.0144, 0.1407],
        [0.1124, 0.2356, 0.0041]], grad_fn=<DivBackward0>)

In [191]:
avg_theta = optimize_problem(switch_obj, int_theta)

In [192]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.1111, 0.1123, 0.1110],
        [0.1121, 0.1130, 0.1113],
        [0.1108, 0.1115, 0.1067]], grad_fn=<DivBackward0>)

In [194]:
switch_obj.predicted_strategy(avg_theta)

tensor([[0.0049, 0.1204, 0.2885],
        [0.0852, 0.0097, 0.1399],
        [0.1066, 0.2432, 0.0016]])

In [348]:
def chicken_feats(action_tuple):
    p1, p2 = action_tuple
    # 0 is drive, 1 is swerve
    # for utility vectors first dim is crash, second dim is look cool, third dim is look like a wimp
    if p1 == 0:
        if p2 == 0:
            return torch.tensor([[1.0,0.0,0.0], [1.0,0.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,1.0,0.0], [0.0,0.0,1.0]])
    elif p1 == 1:
        if p2 == 0:
            return torch.tensor([[0.0,0.0,1.0], [0.0,1.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,0.0,1.0], [0.0,0.0,1.0]])

# pure nash equilibrium

In [349]:
# explicit payoffs for util vector [-5.0, 1.0, 0.0]
chicken_payoffs = torch.tensor([
    [[-5.0, 1.0],[0.0,0.0]],
    [[-5.0, 0.0],[1.0,0.0]]
])

In [350]:
mixed_nash_chicken = torch.tensor([0.1667,.8333]).view(-1,1) @ torch.tensor([.1667, .8333]).view(1,-1)

In [351]:
mixed_nash_chicken

tensor([[0.0278, 0.1389],
        [0.1389, 0.6944]])

In [352]:
chicken_obj_ext = InverseCorrelatedEquilibriumProblem(3, (2,2), mixed_nash_chicken, chicken_feats, (2,2), external_enumerator, apply_external_deviation)

In [353]:
chicken_analytic =  torch.zeros(2,2,3)

In [354]:
chicken_obj_ext.predicted_strategy(chicken_analytic)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])

In [355]:
optimize_analytic(chicken_obj_ext,  chicken_analytic, epochs=1000,  lr=0.1)

tensor([[[ 0.3303, -0.2071, -0.1233],
         [-0.6548, -0.1720,  0.8267]],

        [[ 0.3303, -0.2071, -0.1233],
         [-0.6548, -0.1720,  0.8267]]])

In [356]:
chicken_obj_ext.predicted_strategy(chicken_analytic)

tensor([[0.0303, 0.1375],
        [0.1375, 0.6947]])

In [55]:
chicken_theta = torch.zeros(2,2,3, requires_grad=True)

In [56]:
chicken_obj_ext.predicted_strategy(chicken_theta)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]], grad_fn=<DivBackward0>)

In [57]:
optimize_problem(chicken_obj_ext, chicken_theta, epochs=1000, lr=0.1)

In [58]:
chicken_obj_ext.predicted_strategy(chicken_theta)

tensor([[0.1389, 0.1945],
        [0.1945, 0.4722]], grad_fn=<DivBackward0>)

# correlated equilibrium

In [357]:
# this is another correlated equilibrium per John's slides. sometimes both people are cowardly in this one.
corr_chicken = torch.tensor([[0.0,0.4],[0.4,0.2]])

In [358]:
correq_theta = torch.zeros(2,2,2,3,requires_grad=True)

In [368]:
correq_analytic  = torch.zeros(2,2,2,3, requires_grad=False)

In [369]:
chicken_obj_int = InverseCorrelatedEquilibriumProblem(3, (2,2), corr_chicken, chicken_feats, (2,2,2), switch_enumerator, apply_switch_deviation)

In [370]:
chicken_obj_int.predicted_strategy(correq_analytic)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])

In [373]:
optimize_analytic(chicken_obj_int, correq_analytic,  epochs=10000)

tensor([[[[ 0.0000,  0.0000,  0.0000],
          [-1.8815,  0.8529,  1.0287]],

         [[-0.2229,  0.2892, -0.0663],
          [ 0.0000,  0.0000,  0.0000]]],


        [[[ 0.0000,  0.0000,  0.0000],
          [-1.8815,  0.8529,  1.0287]],

         [[-0.2229,  0.2892, -0.0663],
          [ 0.0000,  0.0000,  0.0000]]]])

In [374]:
chicken_obj_int.predicted_strategy(correq_analytic)

tensor([[0.0012, 0.3994],
        [0.3994, 0.2000]])

In [243]:
correq_theta

tensor([[[[-4.0000e-02,  4.0000e-02,  8.1956e-10],
          [-6.4989e+00, -6.1021e+00,  1.2601e+01]],

         [[ 6.0921e+00,  6.3569e+00, -1.2449e+01],
          [-4.0000e-02,  4.0000e-02,  8.1956e-10]]],


        [[[-4.0000e-02,  4.0000e-02,  8.1956e-10],
          [-6.4989e+00, -6.1021e+00,  1.2601e+01]],

         [[ 6.0921e+00,  6.3569e+00, -1.2449e+01],
          [-4.0000e-02,  4.0000e-02, -1.8626e-09]]]], requires_grad=True)

In [244]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.1250, 0.3250],
        [0.3250, 0.2250]], grad_fn=<DivBackward0>)

In [384]:
# this is a social-welfare-maximizing correlated equilibrium. Computed by Kevin's code, or it just makes sense.
corr_chicken = torch.tensor([[0.0,0.5], [0.5,0.0]])
corr_chicken_approx = torch.tensor([[0.0,0.46], [0.54,0.0]])

correq_theta = torch.ones(2,2,2,3)
chicken_obj_int = InverseCorrelatedEquilibriumProblem(3, (2,2), corr_chicken, chicken_feats, (2,2,2), switch_enumerator, apply_switch_deviation)

In [385]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])

In [386]:
optimize_analytic(chicken_obj_int,  correq_theta, epochs=10000)

tensor([[[[ 1.0000e+00,  1.0000e+00,  1.0000e+00],
          [-9.7536e-01,  1.9754e+00,  2.0000e+00]],

         [[ 2.4636e-02,  2.9754e+00,  1.5948e-06],
          [ 1.0000e+00,  1.0000e+00,  1.0000e+00]]],


        [[[ 1.0000e+00,  1.0000e+00,  1.0000e+00],
          [-9.7536e-01,  1.9754e+00,  2.0000e+00]],

         [[ 2.4636e-02,  2.9754e+00,  1.5948e-06],
          [ 1.0000e+00,  1.0000e+00,  1.0000e+00]]]])

In [387]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.0014, 0.4986],
        [0.4986, 0.0014]])

In [261]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.1250, 0.3750],
        [0.3750, 0.1250]], grad_fn=<DivBackward0>)

In [232]:
correq_theta[0,0,1]
correq_theta[0,1,0]
correq_theta[1,0,1]
correq_theta[1,1,0]

tensor([0.9931, 1.0069, 1.0000], grad_fn=<SelectBackward>)