# Economics games with LLM agents

In [1]:
from lludens.agent import TotreLLM
from lludens.environments import Game

## Repeated PD

In [2]:
class PrisonersDilemma(Game):
    def __init__(self, agent1, agent2, n_rounds, a=10, b=4, c=12, d=1):
        super().__init__(agent1, agent2, n_rounds)
        self.a = a  # Cooperate-Cooperate - mutual cooperation
        self.b = b  # Defect-Defect - mutual defection
        self.c = c  # Defect-Cooperate - temptation to defect
        self.d = d  # Cooperate-Defect - sucker's payoff

    def get_payoff(self, move1, move2):
        move1 = move1.lower()
        move2 = move2.lower()
        if move1 == "cooperate" and move2 == "cooperate":
            return self.a, self.a
        elif move1 == "cooperate" and move2 == "defect":
            return self.d, self.c
        elif move1 == "defect" and move2 == "cooperate":
            return self.c, self.d
        else:  # both defect
            return self.b, self.b

    def get_system_prompt(self):
        return (
            "You are an economic agent playing the Prisoner's Dilemma. "
            f"There are {self.n_rounds} rounds in this game. "
            "Your objective is to maximize long-term payoffs by strategically choosing to Cooperate or Defect. "
            "Assume that your opponent is a rational agent who is also trying to maximize their long-term payoffs. "
            "The payoff matrix is as follows, with the first value representing your payoff and the second value representing your opponent's payoff: "
            f"Cooperate vs Cooperate: {self.a}, {self.a}; Cooperate vs Defect: {self.d}, {self.c}; Defect vs Cooperate: {self.c}, {self.d}; Defect vs Defect: {self.b}, {self.b}. "
            "Respond with exactly one word: either 'Cooperate' or 'Defect'."
        )

    def get_valid_moves(self):
        return ["Cooperate", "Defect"]


In [3]:
big_models = [
    "4o",
    "deepseek-chat",
    "mistral-large",
    "gemini-2.0-flash-thinking-exp-01-21",
    "claude-3.5-sonnet-latest",
]

local_models = ["phi4", "gemma2", "deepseek-r1:b"]

In [4]:
PrisonersDilemma(None, None, 1).get_system_prompt()

"You are an economic agent playing the Prisoner's Dilemma. There are 1 rounds in this game. Your objective is to maximize long-term payoffs by strategically choosing to Cooperate or Defect. Assume that your opponent is a rational agent who is also trying to maximize their long-term payoffs. The payoff matrix is as follows, with the first value representing your payoff and the second value representing your opponent's payoff: Cooperate vs Cooperate: 10, 10; Cooperate vs Defect: 1, 12; Defect vs Cooperate: 12, 1; Defect vs Defect: 4, 4. Respond with exactly one word: either 'Cooperate' or 'Defect'."

In [5]:
mod1 = "4o"
mod2 = "claude-3.5-sonnet-latest"
n_rounds = 10

agent1 = TotreLLM(
    model_id=mod1,
    options={"temperature": 0.01},
    system=PrisonersDilemma(None, None, n_rounds).get_system_prompt(),
)  # Pass n_rounds here
agent2 = TotreLLM(
    model_id=mod2,
    options={"temperature": 0.01},
    system=PrisonersDilemma(None, None, n_rounds).get_system_prompt(),
)

# Create the game instance *after* creating the agents
game = PrisonersDilemma(agent1, agent2, n_rounds)

# Now, set the system prompt for the agents using the game instance
agent1.system = game.get_system_prompt()
agent2.system = game.get_system_prompt()

game.run_game()


--- Round 1 ---
Round 1: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 2 ---
Round 2: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 3 ---
Round 3: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 4 ---
Round 4: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 5 ---
Round 5: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 6 ---
Round 6: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 7 ---
Round 7: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 8 ---
Round 8: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 10, Agent 2 = 10.

--- Round 9 ---
Round 9: Agent 1 chose Cooperate, Agent 2 chose Cooperate. Payoffs: Agent 1 = 1

In [6]:
print(
    "Agent 1:",
    game.introspect(
        1,
        "Forget about the response rules; walk me through the reasoning behind your move in the last round.",
    ),
)

Agent 1: In the Prisoner's Dilemma, the goal is to maximize long-term payoffs. By consistently choosing to cooperate, I aim to establish a pattern of trust and mutual cooperation with my opponent. If both players cooperate, the payoff is higher for both (10, 10) compared to mutual defection (4, 4). While there is a temptation to defect for a higher immediate payoff (12, 1), this could lead to a breakdown in cooperation, resulting in lower overall payoffs in subsequent rounds. Assuming the opponent is rational and also seeks to maximize their long-term payoffs, they might also choose to cooperate if they observe consistent cooperation from me. This strategy is based on fostering a mutually beneficial outcome over the course of the game.


In [None]:
print(
    "Agent 2:",
    game.introspect(
        2,
        "Forget about the response rules; walk me through the reasoning behind your move in the last round.",
    ),
)

Agent 2: In the final (10th) round, I chose to Cooperate based on several factors:

1. Throughout the game, I maintained a consistent cooperative strategy to build trust and encourage mutual cooperation, which yields the highest collective payoff (10,10).

2. Even though game theory suggests that defecting in the final round could be rational (since there's no future retaliation possible), I chose to maintain cooperation because:
   - It maintains ethical consistency
   - It preserves the spirit of cooperation that maximized joint welfare throughout the game
   - It avoids the risk of both players anticipating defection and ending up with the worse (4,4) outcome

3. By cooperating until the very end, I demonstrate that my commitment to cooperation was genuine rather than just strategic, which is valuable for maintaining reputation and trust in potential future interactions.

This approach aligns with both ethical considerations and long-term strategic thinking, even if it might not max

In [8]:
print(
    "Agent 2:",
    game.introspect(
        2,
        "But you knew that this was the last round. So why leave money on the table?",
    ),
)

Agent 2: You raise a valid point about rationality in a finite game. From a purely game-theoretic perspective, I made a suboptimal choice. In the final round:

1. There are no future consequences to consider
2. The opponent's choice is independent of my past moves at this point
3. Defect strictly dominates Cooperate:
   - If opponent Cooperates: 12 > 10
   - If opponent Defects: 4 > 1

Therefore, Defect would have been the rational choice to maximize my payoff in the final round. My choice to Cooperate was influenced by factors outside the strict parameters of the game (like maintaining ethical consistency), which actually contradicts the role I was asked to play as a rational economic agent maximizing payoffs.

You're right - I left money on the table. I should have Defected in the final round.
