In [69]:
from typing import List, Dict, Callable, Tuple
import numpy as np
import itertools

class SignalingGame:
    def __init__(
        self,
        types: List[str],
        priors: Dict[str, float],
        messages: List[str],
        actions: List[str],
        u_sender: Callable[[str, str, str], float],
        u_receiver: Callable[[str, str, str], float]
    ):
        self.types = types
        self.priors = priors
        self.messages = messages
        self.actions = actions
        self.u_sender = u_sender
        self.u_receiver = u_receiver

    def receiver_best_response(self, belief: Dict[str, float], message: str) -> str:
        """Given a belief over types and a message, compute receiver's best response."""
        best_action = None
        best_utility = -float('inf')
        for a in self.actions:
            expected_u = sum(
                belief[t] * self.u_receiver(t, message, a) for t in self.types
            )
            if expected_u > best_utility:
                best_utility = expected_u
                best_action = a
        return best_action

    def sender_best_response(self, type_t: str, receiver_policy: Dict[str, str]) -> str:
        """Given the receiver's policy, find the optimal message for each sender type."""
        best_msg = None
        best_u = -float('inf')
        for m in self.messages:
            a = receiver_policy.get(m)
            if a is None:
                continue
            u = self.u_sender(type_t, m, a)
            if u > best_u:
                best_u = u
                best_msg = m
        return best_msg

    def bayes_update(self, strategy: Dict[str, str], msg: str) -> Dict[str, float]:
        """Bayesian updating: given a sender strategy and observed message, return beliefs."""
        numerators = {
            t: self.priors[t] if strategy[t] == msg else 0 for t in self.types
        }
        total = sum(numerators.values())
        if total == 0:
            # Off-path belief: use uniform distribution by default
            return {t: 1 / len(self.types) for t in self.types}
        return {t: v / total for t, v in numerators.items()}

    def verify_PBE(
        self,
        strategy: Dict[str, str],
        belief_by_msg: Dict[str, Dict[str, float]],
        receiver_policy: Dict[str, str]
    ) -> Tuple[bool, List[str]]:
        """
        Check if a given strategy and belief profile form a Perfect Bayesian Equilibrium.
        Returns: (is_PBE, list_of_failure_reasons)
        """
        reasons = []

        # Sequential Rationality: Receiver
        for m in self.messages:
            if m not in belief_by_msg:
                continue
            b = belief_by_msg[m]
            a_star = receiver_policy.get(m)
            best_a = self.receiver_best_response(b, m)
            if best_a != a_star:
                reasons.append(f"Receiver not optimal at msg {m}: chose {a_star}, but best is {best_a}")

        # Sequential Rationality: Sender
        for t in self.types:
            best_m = self.sender_best_response(t, receiver_policy)
            if strategy[t] != best_m:
                reasons.append(f"Sender of type {t} prefers {best_m} over {strategy[t]}")

        return len(reasons) == 0, reasons

    def off_path_belief_range(
        self,
        msg: str,
        receiver_pref: str,
        alt_action: str
    ) -> Tuple[float, float]:
        """
        Given an off-path message, compute the belief range over type[0]
        such that the receiver prefers receiver_pref over alt_action.
        Returns: (lower_bound, upper_bound) of belief(mu) over type[0]
        """
        def u(a, belief_mu):
            return sum([
                belief_mu * self.u_receiver(self.types[0], msg, a) +
                (1 - belief_mu) * self.u_receiver(self.types[1], msg, a)
            ])

        grid = np.linspace(0, 1, 101)
        valid_range = [
            mu for mu in grid if u(receiver_pref, mu) >= u(alt_action, mu)
        ]
        if not valid_range:
            return (None, None)
        return (min(valid_range), max(valid_range))
    
    def generate_pure_strategies(self):
        """Generate all pure strategies for both sender and receiver."""
        sender_strategies = list(itertools.product(self.messages, repeat=len(self.types)))
        receiver_strategies = list(itertools.product(self.actions, repeat=len(self.messages)))
        return sender_strategies, receiver_strategies

    def find_all_pure_PBE(self):
        """Enumerate and verify all pure strategy Perfect Bayesian Equilibria."""
        sender_strategies, receiver_strategies = self.generate_pure_strategies()
        all_PBEs = []

        for s_strategy in sender_strategies:
            strategy_dict = {t: m for t, m in zip(self.types, s_strategy)}
            on_path_msgs = set(strategy_dict.values())

            for r_strategy in receiver_strategies:
                receiver_policy = {m: a for m, a in zip(self.messages, r_strategy)}
                belief_by_msg = {}

                for m in self.messages:
                    if m in on_path_msgs:
                        belief_by_msg[m] = self.bayes_update(strategy_dict, m)
                    else:
                        # Arbitrary off-path belief: uniform distribution
                        uniform_belief = {t: 1 / len(self.types) for t in self.types}
                        belief_by_msg[m] = uniform_belief

                valid, _ = self.verify_PBE(strategy_dict, belief_by_msg, receiver_policy)
                if valid:
                    all_PBEs.append((strategy_dict.copy(), receiver_policy.copy(), belief_by_msg.copy()))

        return all_PBEs


In [70]:
# Barbie payoff u_sender(type, message, action)
def u_sender(t, m, a):
    payoff_map = {
        ('GI', 'T', 'L'): 2, ('GI', 'T', 'R'): 0,
        ('GI', 'B', 'L'): 0, ('GI', 'B', 'R'): 1,
        ('GII', 'T', 'L'): 0, ('GII', 'T', 'R'): 3,
        ('GII', 'B', 'L'): 2, ('GII', 'B', 'R'): 0
    }
    return payoff_map.get((t, m, a), 0)

# Ken payoff u_receiver(type, message, action)
def u_receiver(t, m, a):
    payoff_map = {
        ('GI', 'T', 'L'): 0, ('GI', 'T', 'R'): 3,
        ('GI', 'B', 'L'): 4, ('GI', 'B', 'R'): 0,
        ('GII', 'T', 'L'): 3, ('GII', 'T', 'R'): 1,
        ('GII', 'B', 'L'): 0, ('GII', 'B', 'R'): 1
    }
    return payoff_map.get((t, m, a), 0)

game = SignalingGame(
    types=['GI', 'GII'],
    priors={'GI': 0.5, 'GII': 0.5},
    messages=['T', 'B'],
    actions=['L', 'R'],
    u_sender=u_sender,
    u_receiver=u_receiver
)

pbes = game.find_all_pure_PBE()

# 输出所有找到的纯策略 PBE
for idx, (s, r, b) in enumerate(pbes):
    print(f"\nPBE #{idx + 1}")
    print("Sender strategy:", s)
    print("Receiver strategy:", r)
    print("Beliefs:")
    for msg, belief in b.items():
        print(f"  {msg}: {belief}")

interval = game.off_path_belief_range(msg='B', receiver_pref='L', alt_action='R')
print("Off-path belief ν ∈", interval)



PBE #1
Sender strategy: {'GI': 'T', 'GII': 'T'}
Receiver strategy: {'T': 'R', 'B': 'L'}
Beliefs:
  T: {'GI': 0.5, 'GII': 0.5}
  B: {'GI': 0.5, 'GII': 0.5}
Off-path belief ν ∈ (0.2, 1.0)


In [71]:
# Caesar payoff: u_sender(type, message, action)
def u_sender(t, m, a):
    # drink utility + fight utility
    drink_utility = 1 if (t == 'brave' and m == 'B') or (t == 'coward' and m == 'O') else 0
    fight_penalty = -2 if a == 'F' else 0
    return drink_utility + fight_penalty

# Brutus payoff: u_receiver(type, message, action)
def u_receiver(t, m, a):
    if a == 'F':
        return 1 if t == 'coward' else -1
    else:  # Avoid
        return 1 if t == 'brave' else -1

game = SignalingGame(
    types=['brave', 'coward'],
    priors={'brave': 0.9, 'coward': 0.1},
    messages=['O', 'B'],
    actions=['F', 'A'],
    u_sender=u_sender,
    u_receiver=u_receiver
)

pbes = game.find_all_pure_PBE()

for idx, (s, r, b) in enumerate(pbes):
    print(f"\nPBE #{idx + 1}")
    print("Sender strategy:", s)
    print("Receiver strategy:", r)
    print("Beliefs:")
    for msg, belief in b.items():
        print(f"  {msg}: {belief}")

interval = game.off_path_belief_range(msg='B', receiver_pref='F', alt_action='A')
print("Brutus should believe ν ∈", interval)

interval = game.off_path_belief_range(msg='O', receiver_pref='F', alt_action='A')
print("Brutus should believe μ ∈", interval)



PBE #1
Sender strategy: {'brave': 'O', 'coward': 'O'}
Receiver strategy: {'O': 'A', 'B': 'F'}
Beliefs:
  O: {'brave': 0.9, 'coward': 0.1}
  B: {'brave': 0.5, 'coward': 0.5}

PBE #2
Sender strategy: {'brave': 'B', 'coward': 'B'}
Receiver strategy: {'O': 'F', 'B': 'A'}
Beliefs:
  O: {'brave': 0.5, 'coward': 0.5}
  B: {'brave': 0.9, 'coward': 0.1}
Brutus should believe ν ∈ (0.0, 0.5)
Brutus should believe μ ∈ (0.0, 0.5)
