#### Experimenting with having two actors/players whoose states and actions affect each other

In [1]:
# Importing relevant libraries.
import numpy as np
import copy

In [2]:
# Global settings and setup of game.
n_players = 2

In [3]:
# Checking that no probabilities are negative.
def mkSimpleProb(pairs: list[tuple[str, float]]) -> dict[str, float]:
    dist: dict[str, float] = {}
    for (st, pr) in pairs:
        if pr >= 0:
            dist[st] = pr
    return dist

In [4]:
class Player:
    def __init__(self, name: str, probs: dict[str, float], states: list[str], actions):
        self.name = name
        self.states = states
        self.pC_Confess = probs["pC_Confess"]
        self.pR_Confess = probs["pR_Confess"]
        self.pC_Refuse = probs["pC_Refuse"]
        self.pR_Refuse = probs["pR_Refuse"]
        self.actions = actions
        self.last_action = None
        self.score = 0.0

    def choose_action(self, x:str) -> str:
        """For now, an action is chosen at random."""
        action = np.random.choice(self.actions(x))
        self.last_action = action
        return action

In [13]:
class Game:
    def __init__(self, players: list[Player], states: list[str]):
        self.players = players
        self.states = states
        self.current_state = "Start"

    def nextFunc(self, t: int, x: str, ys: dict[str, str]) -> dict[str, float]:
        a1 = ys[self.players[0].name]
        a2 = ys[self.players[1].name]

        if a1 == "Confess" and a2 == "Confess":
            return mkSimpleProb([
                ("(1,1)", self.players[0].pC_Confess * self.players[1].pC_Confess),
                ("(5,0)", self.players[0].pC_Confess * self.players[1].pR_Confess),
                ("(0,5)", self.players[0].pR_Confess * self.players[1].pC_Confess),
                ("(3,3)", self.players[0].pR_Confess * self.players[1].pR_Confess)
            ])
        elif a1 == "Confess" and a2 == "Refuse":
            return mkSimpleProb([
                ("(1,1)", self.players[0].pC_Confess * self.players[1].pC_Refuse),
                ("(5,0)", self.players[0].pC_Confess * self.players[1].pR_Refuse),
                ("(0,5)", self.players[0].pR_Confess * self.players[1].pC_Refuse),
                ("(3,3)", self.players[0].pR_Confess * self.players[1].pR_Refuse)
            ])
        elif a1 == "Refuse" and a2 == "Confess":
            return mkSimpleProb([
                ("(1,1)", self.players[0].pC_Refuse * self.players[1].pC_Confess),
                ("(5,0)", self.players[0].pC_Refuse * self.players[1].pR_Confess),
                ("(0,5)", self.players[0].pR_Refuse * self.players[1].pC_Confess),
                ("(3,3)", self.players[0].pR_Refuse * self.players[1].pR_Confess)
            ])
        elif a1 == "Refuse" and a2 == "Refuse":
            return mkSimpleProb([
                ("(1,1)", self.players[0].pC_Refuse * self.players[1].pC_Refuse),
                ("(5,0)", self.players[0].pC_Refuse * self.players[1].pR_Refuse),
                ("(0,5)", self.players[0].pR_Refuse * self.players[1].pC_Refuse),
                ("(3,3)", self.players[0].pR_Refuse * self.players[1].pR_Refuse)
            ])
        else:
            raise ValueError("Invalid action combination.")
    
    def reward(self, t: str, state: str, actions: dict[str, str], next_state: str) -> dict[str, float]:        
        rewards = {}
    
        if next_state == "CC":
            rewards[self.players[0].name] = 1
            rewards[self.players[1].name] = 1
        elif next_state == "CR":
            rewards[self.players[0].name] = 5
            rewards[self.players[1].name] = 0
        elif next_state == "RC":
            rewards[self.players[0].name] = 0
            rewards[self.players[1].name] = 5
        elif next_state == "RR":
            rewards[self.players[0].name] = 3
            rewards[self.players[1].name] = 3
        else:
            raise ValueError("Invalid next state.")
        
        return rewards

In [14]:
def main():
    # Define probabilities.
    probs = {"pC_Confess": 1.0, "pR_Confess": 0.0, "pC_Refuse": 0.0, "pR_Refuse": 1.0}
    states = ["Start", "CC", "CR", "RC", "RR"]
    def actions(x):
        if x in states:
            return ["Confess", "Refuse"]
        else:
            raise ValueError(f"Invalid State: '{x}'.")
    
    # Create two players.
    player1 = Player("Player1", probs, states, actions)
    player2 = Player("Player2", probs, states, actions)
    players = [player1, player2]
    
    # Create the game instance.
    game = Game(players, states)
    
    # PLACEHOLDER: Both players randomly choose an action in the "Start" state.
    actions_chosen = {}
    for player in players:
        action = player.choose_action("Start")
        actions_chosen[player.name] = action
    print("Actions chosen:", actions_chosen)
    
    # Determine the outcome of the transition.
    outcome_distribution = game.nextFunc(0, game.current_state, actions_chosen)
    print("Outcome distribution:", outcome_distribution)
    
    # Compute rewards.
    rewards = game.reward(0, "Start", actions_chosen, "CC")
    print("Rewards:", rewards)

if __name__ == "__main__":
    main()


Actions chosen: {'Player1': 'Confess', 'Player2': 'Refuse'}
Outcome distribution: {'(1,1)': 0.0, '(5,0)': 1.0, '(0,5)': 0.0, '(3,3)': 0.0}
Rewards: {'Player1': 1, 'Player2': 1}
