#### Experimenting with having two actors/players whoose states and actions affect each other

In [1]:
# Importing relevant libraries.
import numpy as np
import copy

In [2]:
# Global settings and setup of game.
n_players = 2

In [3]:
# Checking that no probabilities are negative.
def mkSimpleProb(pairs: list[tuple[str, float]]) -> dict[str, float]:
    dist: dict[str, float] = {}
    for (st, pr) in pairs:
        if pr >= 0:
            dist[st] = pr
    return dist

In [4]:
class Player:
    def __init__(self, name: str, probs: dict[str, float], states: list[str], actions):
        self.name = name
        self.states = states
        self.pC_Confess = probs["pC_Confess"]
        self.pR_Confess = probs["pR_Confess"]
        self.pC_Refuse = probs["pC_Refuse"]
        self.pR_Refuse = probs["pR_Refuse"]
        self.actions = actions
        self.last_action = None
        self.score = 0.0

    def choose_action(self, x:str) -> str:
        """For now, an action is chosen at random."""
        action = np.random.choice(self.actions(x))
        self.last_action = action
        return action

In [5]:
class Game:
    def __init__(self, players: list[Player], states: list[str]):
        self.players = players
        self.states = states
        self.current_state = "Start"
        self.zero = 0.0 # Default value of zero-length policy sequences.

    def nextFunc(self, t: int, x: str, ys: dict[str, str]) -> dict[str, float]:
        a1 = ys[self.players[0].name]
        a2 = ys[self.players[1].name]

        if a1 == "Confess" and a2 == "Confess":
            return mkSimpleProb([
                ("CC", self.players[0].pC_Confess * self.players[1].pC_Confess), #np.prod(player.pC_Confess for player in self.players)), 
                ("CR", self.players[0].pC_Confess * self.players[1].pR_Confess),
                ("RC", self.players[0].pR_Confess * self.players[1].pC_Confess),
                ("RR", self.players[0].pR_Confess * self.players[1].pR_Confess)
            ])
        elif a1 == "Confess" and a2 == "Refuse":
            return mkSimpleProb([
                ("CC", self.players[0].pC_Confess * self.players[1].pC_Refuse),
                ("CR", self.players[0].pC_Confess * self.players[1].pR_Refuse),
                ("RC", self.players[0].pR_Confess * self.players[1].pC_Refuse),
                ("RR", self.players[0].pR_Confess * self.players[1].pR_Refuse)
            ])
        elif a1 == "Refuse" and a2 == "Confess":
            return mkSimpleProb([
                ("CC", self.players[0].pC_Refuse * self.players[1].pC_Confess),
                ("CR", self.players[0].pC_Refuse * self.players[1].pR_Confess),
                ("RC", self.players[0].pR_Refuse * self.players[1].pC_Confess),
                ("RR", self.players[0].pR_Refuse * self.players[1].pR_Confess)
            ])
        elif a1 == "Refuse" and a2 == "Refuse":
            return mkSimpleProb([
                ("CC", self.players[0].pC_Refuse * self.players[1].pC_Refuse),
                ("CR", self.players[0].pC_Refuse * self.players[1].pR_Refuse),
                ("RC", self.players[0].pR_Refuse * self.players[1].pC_Refuse),
                ("RR", self.players[0].pR_Refuse * self.players[1].pR_Refuse)
            ])
        else:
            raise ValueError("Invalid action combination.")
    
    def reward(self, t: str, state: str, actions: dict[str, str], next_state: str) -> dict[str, float]:        
        rewards = {}
    
        if next_state == "CC":
            rewards[self.players[0].name] = 1
            rewards[self.players[1].name] = 1
        elif next_state == "CR":
            rewards[self.players[0].name] = 5
            rewards[self.players[1].name] = 0
        elif next_state == "RC":
            rewards[self.players[0].name] = 0
            rewards[self.players[1].name] = 5
        elif next_state == "RR":
            rewards[self.players[0].name] = 3
            rewards[self.players[1].name] = 3
        else:
            raise ValueError("Invalid next state.")
        
        return rewards
    
    # Function defining how to add rewards together.
    def add(self, a: float, b: float) -> dict[str, float]:
        if type(a) != dict or type(b) != dict:
            raise TypeError(f"Inputs must be of type 'dict', not '{type(a).__name__}' and '{type(b).__name__}'.")
        sum = {}
        for player in self.players:
            sum[player.name] = a[player.name] + b[player.name]
        print("sum:", sum)
        return sum

    # Function for measuring a certain value.
    def meas(self, values: dict, pr: float) -> dict[str, float]:
        if type(values) != dict or type(pr) != float:
            raise TypeError(f"Inputs must be of type 'dict' and 'float', not '{type(values).__name__}' and '{type(pr).__name__}'.")
        print("values:", values)
        print("pr:", pr)
        measured = {}
        for player in self.players:
            measured[player.name] = values[player.name] * pr
        return measured # Returns the expected value.

    # Computing the total expected value from a policy sequence when starting at time t in state x.
    def val(self, t: int, ps: dict[list[dict[str, str]]], x: str) -> dict[str, float]:
        if t < 0 or type(t) != int:
            raise ValueError(f"Invalid time step: '{t}' (must be positive integer).")
        if type(ps) != dict:
            raise TypeError(f"Invalid policy list, must be list of dictionaries (or empty list).")
        if x not in self.states:
            raise ValueError(f"Invalid state: '{x}'")
        
        values = {player.name: self.zero for player in self.players}
        print("Values so far:", values)
        ys = {}
        for player in self.players:
            if len(ps[player.name]) == 0:
                return values
            ys[player.name] = ps[player.name][0][x]
        m_next = self.nextFunc(t, x, ys)
        print("m_next items:", m_next.items())
        for x_prim, pr in m_next.items():
            new_vals = self.meas(
                self.add(
                    self.reward(t, x, ys, x_prim), 
                    self.val(t+1, {p: ps[p][1:] for p in ps}, x_prim)
                    ),
                    pr)
            for player in self.players:
                values[player.name] += new_vals[player.name]
            

        return values

In [9]:
def main():
    # Define probabilities.
    probs = {"pC_Confess": 1.0, "pR_Confess": 0.0, "pC_Refuse": 0.0, "pR_Refuse": 1.0}
    states = ["Start", "CC", "CR", "RC", "RR"]
    def actions(x):
        if x in states:
            return ["Confess", "Refuse"]
        else:
            raise ValueError(f"Invalid State: '{x}'.")
    
    # Create two players.
    player1 = Player("Player1", probs, states, actions)
    player2 = Player("Player2", probs, states, actions)
    players = [player1, player2]
    
    # Create the game instance.
    game = Game(players, states)
    
    # PLACEHOLDER.
    actions_chosen = {}
    for player in players:
        action = player.choose_action("Start")
        actions_chosen[player.name] = action
    print("Actions chosen:", actions_chosen)
    
    # Determine the outcome of the transition.
    outcome_distribution = game.nextFunc(0, game.current_state, actions_chosen)
    print("Outcome distribution:", outcome_distribution)
    
    # Compute rewards.
    rewards = game.reward(0, "Start", actions_chosen, "CC")
    print("Rewards:", rewards)

    ps1 = {
        "Player1": [{"CC": "Confess", "CR": "Confess", "RC": "Confess", "RR": "Confess"}, 
                    {"CC": "Confess", "CR": "Confess", "RC": "Confess", "RR": "Confess"}], 
        "Player2": [{"CC": "Confess", "CR": "Confess", "RC": "Confess", "RR": "Confess"}, 
                    {"CC": "Confess", "CR": "Confess", "RC": "Confess", "RR": "Confess"}]
        }
    
    ps2 = {
        "Player1": [{"CC": "Refuse", "CR": "Refuse", "RC": "Refuse", "RR": "Refuse"}, 
                    {"CC": "Refuse", "CR": "Refuse", "RC": "Refuse", "RR": "Refuse"}], 
        "Player2": [{"CC": "Refuse", "CR": "Refuse", "RC": "Refuse", "RR": "Refuse"}, 
                    {"CC": "Refuse", "CR": "Refuse", "RC": "Refuse", "RR": "Refuse"}]
        }

    values_ps1 = game.val(0, ps1, "CC")
    print("Values from ps1:", values_ps1)

    values_ps2 = game.val(0, ps2, "CC")
    print("Values from ps1:", values_ps2)
if __name__ == "__main__":
    main()


Actions chosen: {'Player1': 'Confess', 'Player2': 'Refuse'}
Outcome distribution: {'CC': 0.0, 'CR': 1.0, 'RC': 0.0, 'RR': 0.0}
Rewards: {'Player1': 1, 'Player2': 1}
Values so far: {'Player1': 0.0, 'Player2': 0.0}
m_next items: dict_items([('CC', 1.0), ('CR', 0.0), ('RC', 0.0), ('RR', 0.0)])
Values so far: {'Player1': 0.0, 'Player2': 0.0}
m_next items: dict_items([('CC', 1.0), ('CR', 0.0), ('RC', 0.0), ('RR', 0.0)])
Values so far: {'Player1': 0.0, 'Player2': 0.0}
sum: {'Player1': 1.0, 'Player2': 1.0}
values: {'Player1': 1.0, 'Player2': 1.0}
pr: 1.0
Values so far: {'Player1': 0.0, 'Player2': 0.0}
sum: {'Player1': 5.0, 'Player2': 0.0}
values: {'Player1': 5.0, 'Player2': 0.0}
pr: 0.0
Values so far: {'Player1': 0.0, 'Player2': 0.0}
sum: {'Player1': 0.0, 'Player2': 5.0}
values: {'Player1': 0.0, 'Player2': 5.0}
pr: 0.0
Values so far: {'Player1': 0.0, 'Player2': 0.0}
sum: {'Player1': 3.0, 'Player2': 3.0}
values: {'Player1': 3.0, 'Player2': 3.0}
pr: 0.0
sum: {'Player1': 2.0, 'Player2': 2.0}
val

In [7]:
    ps = {
        "Player1": [{"CC": "Confess", "RR": "Confess"}, {"CC": "Confess", "RR": "Confess"}], 
        "Player2": [{"CC": "Confess", "RR": "Confess"}, {"CC": "Confess", "RR": "Confess"}]
        }
ps = {p: ps[p][1:] for p in ps}
print(ps)
ps = {p: ps[p][1:] for p in ps}
print(ps)
print(len(ps["Player1"]))

{'Player1': [{'CC': 'Confess', 'RR': 'Confess'}], 'Player2': [{'CC': 'Confess', 'RR': 'Confess'}]}
{'Player1': [], 'Player2': []}
0


In [92]:
players = {"Player1": 1, "Player2": 1}

measured = {}
for player in players:
    measured[player] = players["Player1"] * 2
print(measured) # Returns the expected value.

{'Player1': 2, 'Player2': 2}


In [94]:
ls = [1,2,3,4]

print(np.prod(ls))

24


In [104]:
d1 = {"Player1": 1}
d2 = {"Player2": 1}

print(d1+d2)

TypeError: unsupported operand type(s) for +: 'dict' and 'dict'