In [1]:
import torch

In [9]:
# deviations are tuples, first index of deviation must always specify player (at least for now)

In [136]:
class InverseCorrelatedEquilibriumProblem:
    
    def __init__(self,
                 K,
                 player_action_dims,
                 observed_strategy,
                 payoff_features,
                 deviations_dim,
                 get_deviation_iter,
                 apply_deviation):
        self.num_players = len(player_action_dims)
        self.player_action_dims = player_action_dims
        self.observed_strategy = observed_strategy
        self.payoff_features_fn = payoff_features
        self.deviations_dim = deviations_dim
        self.get_deviation_iter = get_deviation_iter
        self.apply_deviation_fn = apply_deviation
        assert self.deviations_dim[0] == self.num_players
        self.K = K
    
    def enumerate_joint_actions(self):
        return itertools.product(*[range(d) for d in self.player_action_dims])
    
    def predicted_strategy(self, theta):
        unnormalized_dist = torch.zeros(*self.player_action_dims)
        # dot product of each regret feat with each theta
        for joint_action in self.enumerate_joint_actions():
            action_regret_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
            action_regret_scalars = torch.sum(action_regret_feats * theta, dim=len(theta.shape)-1)
            unnormalized_dist[joint_action] = torch.exp(-torch.sum(action_regret_scalars))
        Z = torch.sum(unnormalized_dist)
        return unnormalized_dist / Z

    def compute_phi_regrets_for_action(self, action_tens):
    # these are the instantaneous regrets for all the specific deviations
        regret_feats = torch.zeros(*self.deviations_dim, self.K)
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            deviation_applied = self.apply_deviation_fn(action_tens, deviation)
            # get regrets for specific player only (player is specified by 0 of deviation)
            regret_feats[deviation] = self.payoff_features_fn(deviation_applied)[deviation[0]] - self.payoff_features_fn(action_tens)[deviation[0]]
        return regret_feats
    
    def compute_expected_regret_feats(self, action_dist):
        # this can probably be run once and cached
        total_regret_feats = torch.zeros(*self.deviations_dim, self.K)
        n = 0
        for joint_action in self.enumerate_joint_actions():
            n += 1
            total_regret_feats += action_dist[joint_action] * self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
        return total_regret_feats / n
    
    
    def maxent_dual_objective(self, theta):
        bigZ = torch.tensor(0.0)
    
        # for each joint action in A
        for joint_action in self.enumerate_joint_actions():

                little_r_a_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
                # scalar features for all deviations f with their own theta_fs
                little_r_a_scalar = torch.sum(little_r_a_feats * theta, dim=len(theta.shape)-1)
                # sum up, exp, add to Z
                bigZ += torch.exp( -torch.sum(little_r_a_scalar))
        obj = torch.log(bigZ)
        # computing expected big regret for theta_f is max over phi_f of r_f(predicted | theta_f)
        # phi_f here is just the whole phi
        expected_er_feats = self.compute_expected_regret_feats(self.observed_strategy)

        # for each deviation
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            this_deviation_theta = theta[deviation].view(*[1 for _ in deviation],-1) # unsqueeze to broadcast
            # sorry that is a really hacky way to do it, but i think it does what we want
            # i.e. add one empty dim for all dims of deviations, then -1 for the dim that is size K
            little_scalar_regrets = torch.sum(expected_er_feats * this_deviation_theta, dim=len(theta.shape)-1)
            # little_scalar_regrets contains the regret for theta_f for all the different fs
            big_Regret = torch.max(little_scalar_regrets)
            obj += big_Regret
        return obj

In [69]:
def rps_feats(action_tuple):
    # 0 is rock, 1 is paper, 2 is scissors
    p1, p2 = action_tuple
    # feat_vecs has shape N, K
    if p1 == 0:
        if p2 == 0:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 2:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
    elif p1 == 1:
        if p2 == 0:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
    elif p1 == 2:
        if p2 == 0:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 1:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])

In [86]:
def external_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                yield (i, j)
    return e

In [110]:
def switch_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                for k in range(player_action_dims[i]):
                    yield (i, j, k)
    return e

In [88]:
def apply_external_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, action = deviation
    new_action_tens[player] = action
    return new_action_tens

In [89]:
def apply_switch_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, actionx, actiony = deviation
    if new_action_tens[player] == actionx:
        new_action_tens[player] = actiony
    return new_action_tens

In [146]:
from torch import optim

In [156]:
def optimize(prob_obj, theta, epochs=100):
    optimizer = optim.Adam([theta], lr=0.1)
    for i in range(epochs):
        optimizer.zero_grad()
        loss = prob_obj.maxent_dual_objective(theta)
        loss.backward()
        optimizer.step()

In [148]:
nash_eq_rps = torch.tensor([1/3 + 0.001,1/3,1/3 - 0.001]).view(-1,1) @ torch.tensor([1/3,1/3 - 0.01,1/3 + 0.01]).view(1,-1)

In [149]:
my_obj = InverseCorrelatedEquilibriumProblem(2, (3, 3), nash_eq_rps, rps_feats, (2,3), external_enumerator, apply_external_deviation)

In [150]:
my_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[ 0.,  0.],
         [ 0., -1.],
         [ 1., -1.]],

        [[ 0., -1.],
         [ 0.,  0.],
         [ 1., -1.]]])

In [164]:
ext_theta = torch.rand(2,3,2).requires_grad_(True)

In [165]:
my_obj.predicted_strategy(ext_theta)

tensor([[0.0074, 0.2031, 0.0657],
        [0.0554, 0.0029, 0.1663],
        [0.4267, 0.0654, 0.0071]], grad_fn=<DivBackward0>)

In [166]:
optimize(my_obj, ext_theta)

In [167]:
my_obj.predicted_strategy(ext_theta)

tensor([[0.1112, 0.1109, 0.1113],
        [0.1103, 0.1102, 0.1109],
        [0.1119, 0.1112, 0.1121]], grad_fn=<DivBackward0>)

In [168]:
switch_obj = InverseCorrelatedEquilibriumProblem(2, (3,3), nash_eq_rps, rps_feats, (2,3,3), switch_enumerator, apply_switch_deviation)

In [169]:
switch_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[[ 0.,  0.],
          [ 0., -1.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]],


        [[[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0., -1.],
          [ 0.,  0.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]]])

In [171]:
int_theta = torch.rand(2,3,3,2, requires_grad=True)

In [172]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.0102, 0.1721, 0.1508],
        [0.1628, 0.0041, 0.2825],
        [0.1304, 0.0829, 0.0041]], grad_fn=<DivBackward0>)

In [173]:
optimize(switch_obj, int_theta)

In [174]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.1113, 0.1106, 0.1112],
        [0.1115, 0.1108, 0.1111],
        [0.1115, 0.1110, 0.1108]], grad_fn=<DivBackward0>)