In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from einops import einsum, rearrange

In [12]:
def generate_P_matrix(p):
    """
    P_{kij} = P(Agent 0 observes i, Agent 1 observes j | Currently in state k)
    """
    p1_matrix = torch.tensor([
        [1-p,p,0,0],
        [p,1-p,0,0],
        [0,0,1-p,p],
        [0,0,p,1-p]
    ], dtype=torch.float32).unsqueeze(-1)
    p2_matrix = torch.tensor([
        [1-p,0,p,0],
        [0,1-p,0,p],
        [p,0,1-p,0],
        [0,p,0,1-p]
    ], dtype=torch.float32).unsqueeze(-1)
    return torch.matmul(p1_matrix,p2_matrix.mT)

In [13]:
def generate_Q_matrix(policy_1, policy_2):
    """
    Q_{lij} = Pr(Going to state l | Agent 0 sees i, Agent 1 sees j)
    """
    p1 = policy_1.unsqueeze(-1)
    p2 = policy_2.unsqueeze(-1)
    return torch.stack(((1-p1)@(1-p2).T, (1-p1)@p2.T, p1@(1-p2).T, p1@p2.T))


In [17]:
p_i = torch.tensor([0.1, 0.2, 0.3, 0.4]).float()
p_j = torch.tensor([0.5, 0.6, 0.7, 0.8]).float()
p = 0.01

P = generate_P_matrix(p)
Q = generate_Q_matrix(p_i, p_j)

print(P.dtype)
print(Q.dtype)
PQ = torch.einsum("kij, lij -> kl", [P, Q])
#PQ = torch.tensordot(P, Q)
print(PQ)


torch.float32
torch.float32
tensor([[0.4477, 0.4513, 0.0503, 0.0507],
        [0.3188, 0.4822, 0.0792, 0.1198],
        [0.2111, 0.4879, 0.0909, 0.2101],
        [0.1214, 0.4796, 0.0806, 0.3184]])


In [18]:
class NoisyIPD:
    def __init__(self, game, p_i, p_j, p, gamma, optim_index):
        """
        Note: self.game should be 4x2
        """
        self.game = game.float()
        self.p_i = torch.nn.Parameter(p_i.float(), requires_grad=True)
        self.p_j = p_j.float()
        self.p = p
        self.gamma = gamma
        self.P = generate_P_matrix(self.p)
        self.Q = generate_Q_matrix(self.p_i, self.p_j)
        self.values_0, self.values_1 = self.find_values()


    def find_values(self):
        
        I = torch.eye(4)  # Identity matrix of size 4x4
            
        # sum over ij of P_kij * Q_lij

        PQ = torch.einsum("kij, lij -> kl", self.P, self.Q)
#        self.subtracted_matrix = I - self.gamma * torch.matmul(self.P, self.Q)
        self.subtracted_matrix = I - self.gamma * PQ
        
        inverse_matrix = torch.linalg.solve(self.subtracted_matrix, torch.eye(4))

        values = torch.matmul(inverse_matrix, self.game)

        return values.T


    def optimize_pi(self, num_iterations, learning_rate=0.05):
        #  logit_p_i = torch.log(self.p_i / (1 - self.p_i)).clone().detach().requires_grad_(True)  # Logit transformation
        logit_p_i = torch.logit(self.p_i).clone().detach().requires_grad_(True)  # Logit transformation

        optimizer = torch.optim.Adam([logit_p_i], lr=learning_rate)
        storage = {}
        for i in range(num_iterations):
            #print(f"\n Run {i}")
            optimizer.zero_grad()

            self.p_i = torch.sigmoid(logit_p_i)
            self.Q = generate_Q_matrix(self.p_i, self.p_j)
            self.values_0, self.values_1 = self.find_values()

            loss = -self.values_0.sum()   # We want to maximize self.values_0, so we negate it for minimization
            #  loss.backward(retain_graph=True)
            loss.backward()

            #print("p_i values:", self.p_i)
            #print("value for agent i:", self.values_0)
            #print("total value for agent i:", self.values_0.sum())
            #print("loss:", loss)  # Check the value of the loss
            storage[i] = {}
            storage[i]["p_i"] = self.p_i
            storage[i]["values_0"] = self.values_0
            storage[i]["total_value_0"] = self.values_0.sum()
            storage[i]["loss"] = loss

            optimizer.step()
        return storage

#            with torch.no_grad():  # We don't want these operations to be tracked in the computational graph
#                eps = 1e-7
#                logit_p_i = torch.log((self.p_i + eps) / (1 - self.p_i + eps)).clone().detach().requires_grad_(True)



In [29]:

prisoners_dilemma = torch.tensor([[[3, 0],
                                   [4, 1]],
                                   
                                  [[3, 4],
                                   [0, 1]]])

game_1 = NoisyIPD(game = prisoners_dilemma,
                        p_i = torch.tensor([0.1, 0.2, 0.3, 0.4]).float(),
                        p_j = torch.tensor([0.5, 0.6, 0.7, 0.8]).float(),
                        p = p,
                        gamma = gamma)

storage_vals = game_obj.optimize_pi()

torch.Size([4])
torch.Size([1, 4])
