In [1]:
import torch

In [9]:
# deviations are tuples, first index of deviation must always specify player (at least for now)

In [508]:
class InverseCorrelatedEquilibriumProblem:
    
    def __init__(self,
                 K,
                 player_action_dims,
                 observed_strategy,
                 payoff_features,
                 deviations_dim,
                 get_deviation_iter,
                 apply_deviation):
        self.num_players = len(player_action_dims)
        self.player_action_dims = player_action_dims
        self.observed_strategy = observed_strategy
        self.payoff_features_fn = payoff_features
        self.deviations_dim = deviations_dim
        self.get_deviation_iter = get_deviation_iter
        self.apply_deviation_fn = apply_deviation
        assert self.deviations_dim[0] == self.num_players
        self.K = K
    
    def enumerate_joint_actions(self):
        return itertools.product(*[range(d) for d in self.player_action_dims])
    
    def predicted_strategy(self, theta):
        unnormalized_dist = torch.zeros(*self.player_action_dims)
        # dot product of each regret feat with each theta
        for joint_action in self.enumerate_joint_actions():
            action_regret_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
            action_regret_scalars = torch.sum(action_regret_feats * theta, dim=len(theta.shape)-1)
            unnormalized_dist[joint_action] = torch.exp(-torch.sum(action_regret_scalars))
        Z = torch.sum(unnormalized_dist)
        return unnormalized_dist / Z

    def compute_phi_regrets_for_action(self, action_tens):
    # these are the instantaneous regrets for all the specific deviations
        regret_feats = torch.zeros(*self.deviations_dim, self.K)
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            deviation_applied = self.apply_deviation_fn(action_tens, deviation)
            # get regrets for specific player only (player is specified by 0 of deviation)
            regret_feats[deviation] = self.payoff_features_fn(deviation_applied)[deviation[0]] - self.payoff_features_fn(action_tens)[deviation[0]]
        return regret_feats
    
    def compute_expected_regret_feats(self, action_dist):
        # this can probably be run once and cached
        total_regret_feats = torch.zeros(*self.deviations_dim, self.K)
        n = 0
        for joint_action in self.enumerate_joint_actions():
            n += 1
            total_regret_feats += action_dist[joint_action] * self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
        return total_regret_feats / n
    
    
    def maxent_dual_objective(self, theta, l1_coeff=0.1):
        bigZ = torch.tensor(0.0)
    
        # for each joint action in A
        for joint_action in self.enumerate_joint_actions():
            little_r_a_feats = self.compute_phi_regrets_for_action(torch.tensor(list(joint_action)))
            # scalar features for all deviations f with their own theta_fs
            little_r_a_scalar = torch.sum(little_r_a_feats * theta, dim=len(theta.shape)-1)
            # sum up, exp, add to Z
            bigZ += torch.exp( -torch.sum(little_r_a_scalar))
        obj = torch.log(bigZ)
        # computing expected big regret for theta_f is max over phi_f of r_f(predicted | theta_f)
        # phi_f here is just the whole phi
        expected_er_feats = self.compute_expected_regret_feats(self.observed_strategy)

        # for each deviation
        dev_iter = self.get_deviation_iter(self.player_action_dims)
        for deviation in dev_iter():
            this_deviation_theta = theta[deviation].view(*[1 for _ in deviation],-1) # unsqueeze to broadcast
            # sorry that is a really hacky way to do it, but i think it does what we want
            # i.e. add one empty dim for all dims of deviations, then -1 for the dim that is size K
            little_scalar_regrets = torch.sum(expected_er_feats * this_deviation_theta, dim=len(theta.shape)-1)
            # little_scalar_regrets contains the regret for theta_f for all the different fs
            big_Regret = torch.max(little_scalar_regrets)
            obj += big_Regret
        obj += l1_coeff * torch.norm(theta, 1)
        return obj

In [282]:
def rps_feats(action_tuple):
    # 0 is rock, 1 is paper, 2 is scissors
    p1, p2 = action_tuple
    # feat_vecs has shape N, K
    if p1 == 0:
        if p2 == 0:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 2:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
    elif p1 == 1:
        if p2 == 0:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
    elif p1 == 2:
        if p2 == 0:
            return torch.tensor([[0.0,1.0],[0.0,1.0]])
        if p2 == 1:
            return torch.tensor([[1.0,0.0],[1.0,0.0]])
        if p2 == 2:
            return torch.tensor([[0.0,0.0],[0.0,0.0]])

In [283]:
def external_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                yield (i, j)
    return e

In [284]:
def switch_enumerator(player_action_dims):
    def e():
        for i in range(len(player_action_dims)):
            for j in range(player_action_dims[i]):
                for k in range(player_action_dims[i]):
                    yield (i, j, k)
    return e

In [285]:
def apply_external_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, action = deviation
    new_action_tens[player] = action
    return new_action_tens

In [286]:
def apply_switch_deviation(action_tens, deviation):
    new_action_tens = torch.clone(action_tens)
    player, actionx, actiony = deviation
    if new_action_tens[player] == actionx:
        new_action_tens[player] = actiony
    return new_action_tens

In [287]:
from torch import optim

In [288]:
import ipdb

In [489]:
def optimize_problem(prob_obj, theta, epochs=100, lr=0.1):
    optimizer = optim.Adam([theta], lr=lr)
    for i in range(epochs):
        optimizer.zero_grad()
        loss = prob_obj.maxent_dual_objective(theta)
        loss.backward()
        optimizer.step()

In [469]:
nash_eq_rps = torch.tensor([1/3 + 0.001,1/3,1/3 - 0.001]).view(-1,1) @ torch.tensor([1/3,1/3 - 0.01,1/3 + 0.01]).view(1,-1)

In [470]:
my_obj = InverseCorrelatedEquilibriumProblem(2, (3, 3), nash_eq_rps, rps_feats, (2,3), external_enumerator, apply_external_deviation)

In [471]:
my_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[ 0.,  0.],
         [ 0., -1.],
         [ 1., -1.]],

        [[ 0., -1.],
         [ 0.,  0.],
         [ 1., -1.]]])

In [307]:
ext_theta = torch.rand(2,3,2).requires_grad_(True)

In [308]:
my_obj.predicted_strategy(ext_theta)

tensor([[0.0027, 0.2093, 0.1895],
        [0.1628, 0.0033, 0.0772],
        [0.0880, 0.2656, 0.0016]], grad_fn=<DivBackward0>)

In [309]:
optimize_problem(my_obj, ext_theta)

In [310]:
my_obj.predicted_strategy(ext_theta)

tensor([[0.1118, 0.1084, 0.1087],
        [0.1144, 0.1125, 0.1103],
        [0.1121, 0.1112, 0.1106]], grad_fn=<DivBackward0>)

In [311]:
switch_obj = InverseCorrelatedEquilibriumProblem(2, (3,3), nash_eq_rps, rps_feats, (2,3,3), switch_enumerator, apply_switch_deviation)

In [312]:
switch_obj.compute_phi_regrets_for_action(torch.tensor([0,1]))

tensor([[[[ 0.,  0.],
          [ 0., -1.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]],


        [[[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]],

         [[ 0., -1.],
          [ 0.,  0.],
          [ 1., -1.]],

         [[ 0.,  0.],
          [ 0.,  0.],
          [ 0.,  0.]]]])

In [313]:
int_theta = torch.rand(2,3,3,2, requires_grad=True)

In [314]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.0192, 0.1568, 0.0960],
        [0.2954, 0.0089, 0.0973],
        [0.1191, 0.1933, 0.0139]], grad_fn=<DivBackward0>)

In [315]:
optimize_problem(switch_obj, int_theta)

In [316]:
switch_obj.predicted_strategy(int_theta)

tensor([[0.1068, 0.1145, 0.1099],
        [0.1116, 0.1142, 0.1077],
        [0.1088, 0.1157, 0.1107]], grad_fn=<DivBackward0>)

In [318]:
def chicken_feats(action_tuple):
    p1, p2 = action_tuple
    # 0 is drive, 1 is swerve
    # for utility vectors first dim is crash, second dim is look cool, third dim is look like a wimp
    if p1 == 0:
        if p2 == 0:
            return torch.tensor([[1.0,0.0,0.0], [1.0,0.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,1.0,0.0], [0.0,0.0,1.0]])
    elif p1 == 1:
        if p2 == 0:
            return torch.tensor([[0.0,0.0,1.0], [0.0,1.0,0.0]])
        if p2 == 1:
            return torch.tensor([[0.0,0.0,1.0], [0.0,0.0,1.0]])

In [519]:
pure_nash_chicken_mixed = torch.tensor([0.8, 0.2]).view(-1,1) @ torch.tensor([0.8, 0.2]).view(1,-1)

In [520]:
chicken_obj_ext = InverseCorrelatedEquilibriumProblem(3, (2,2), pure_nash_chicken_mixed, chicken_feats, (2,2), external_enumerator, apply_external_deviation)

In [521]:
chicken_theta = torch.zeros(2,2,3, requires_grad=True)

In [522]:
chicken_obj_ext.predicted_strategy(chicken_theta)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]], grad_fn=<DivBackward0>)

In [523]:
optimize_problem(chicken_obj_ext, chicken_theta, epochs=100, lr=0.1)

In [524]:
chicken_obj_ext.predicted_strategy(chicken_theta)

tensor([[0.4430, 0.2023],
        [0.2023, 0.1524]], grad_fn=<DivBackward0>)

In [515]:
pure_nash_chicken_mixed

tensor([[0.6400, 0.1600],
        [0.1600, 0.0400]])

In [518]:
chicken_theta

tensor([[[ 3.5033,  6.0951, -7.9300],
         [-3.4692, -6.3111, 10.0223]],

        [[ 3.5033,  6.0951, -7.9300],
         [-3.4692, -6.3111, 10.0223]]], requires_grad=True)

In [526]:
corr_chicken = torch.tensor([[0.2,0.4],[0.4,0.0]])

In [534]:
correq_theta = torch.zeros(2,2,2,3,requires_grad=True)

In [535]:
chicken_obj_int = InverseCorrelatedEquilibriumProblem(3, (2,2), corr_chicken, chicken_feats, (2,2,2), switch_enumerator, apply_switch_deviation)

In [536]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]], grad_fn=<DivBackward0>)

In [537]:
optimize_problem(chicken_obj_int, correq_theta, epochs=100, lr=0.1)

In [538]:
correq_theta

tensor([[[[-0.0185,  0.0220, -0.0309],
          [-4.6689, -4.2408,  9.3307]],

         [[ 5.4053,  5.5481, -8.6846],
          [-0.0185,  0.0220, -0.0309]]],


        [[[-0.0185,  0.0220, -0.0309],
          [-4.6689, -4.2408,  9.3307]],

         [[ 5.4053,  5.5481, -8.6846],
          [-0.0185,  0.0220, -0.0309]]]], requires_grad=True)

In [539]:
chicken_obj_int.predicted_strategy(correq_theta)

tensor([[0.2257, 0.3164],
        [0.3164, 0.1416]], grad_fn=<DivBackward0>)

In [540]:
corr_chicken

tensor([[0.2000, 0.4000],
        [0.4000, 0.0000]])