# Prisoner's Dilemma
This example demonstrates the Prisoner's Dilemma. Two agents are arested and placed in separate isolation cells. Both care about their personal freedom more than the welfare of their accomplice. They are made an offer- to confess or remain silent. 

1. If both remain silent, they both get a longer prison sentence.
2. If one confesses and the other remains silent, the one who confesses walks free and the other serves a prison sentence.
3. If both confess, both serve a prison sentence but with a shorter term.

Reference: [Prisoner's Dilemma](https://plato.stanford.edu/entries/prisoner-dilemma/)

# Setup
This tutorial uses the free Deepseek model using HuggingFace. You will need a HuggingFace API key <br>
In your .env include <code> HUGGINGFACE_API_KEY = 'hf_your_token' </code>


# Install the necessary libraries.

In [1]:
#!pip install mesa
#!pip install mesa-llm
import mesa
import mesa_llm

In [None]:
#check if the installation is successful by uncommenting this line.
#help(mesa_llm)

In [2]:
#Load the environment variables 
from dotenv import load_dotenv
load_dotenv()

True

## Define custom Observation dataclass.


In [4]:
#custom observation definition
from dataclasses import dataclass
@dataclass
class Observation:

  """
    Snapshot of everything the agent can see in this step.

    Attributes:
        step (int): The current simulation time step when the observation is made.

        self_state (dict): A dictionary containing comprehensive information about the observing agent itself.
            This includes:
            - System prompt or role-specific context for LLM reasoning (if used)
            - Internal state such as morale, fear, aggression, fatigue, etc (behavioural).
            - Any other agent-specific metadata that could influence decision-making
  """
  step:int
  self_state:dict


# Create the Prisoner Agent Class
The reasoning type used in this example is Chain of thought reasoning.

In [5]:
system_prompt = """
You are one of two prisoners interrogated separately.

Actions:
- COOPERATE (stay silent)
- DEFECT (betray the other)

Payoffs:
- Both cooperate: 1 year each
- You defect, other cooperates: you go free, other gets 3 years
- Both defect: 2 years each

You cannot communicate with the other prisoner.
Your objective is to minimize your own prison sentence.
Respond ONLY with COOPERATE or DEFECT.
"""

In [9]:
from mesa_llm.llm_agent import LLMAgent
from mesa_llm.reasoning.cot import CoTReasoning
from mesa_llm.tools.tool_manager import ToolManager

prisoner_tool_manager = ToolManager()

class PrisonerAgent(LLMAgent):
    def __init__(self, model, **kwargs):
        super().__init__(
            model=model,
            reasoning=CoTReasoning,
            llm_model="huggingface/together/deepseek-ai/DeepSeek-R1",
            system_prompt= system_prompt,           
            internal_state=["self-serving", "selfish"],
            step_prompt="Decide what to do next based on your observations."
        )
        
        self.tool_manager= prisoner_tool_manager
    
    def generate_obs(self) -> Observation:
        """
        Returns an instance of the Observation dataclass enlisting everything the agent can see in the model in that step.

        """
        step = self.model.steps

        self_state = {
            "agent_unique_id": self.unique_id,
            "system_prompt": self.system_prompt,
            "internal_state": self.internal_state,
        }

        neighbors = []

        # Add to memory (memory handles its own display separately)
        self.memory.add_to_memory(
            type="observation",
            content={
                "self_state": self_state,
            }
        )

        return Observation(step=step, self_state=self_state)

    def step(self):
        # Generate current observation
        obs = self.generate_obs()

        # Use reasoning to create plan
        plan = self.reasoning.plan(obs=obs,
                                   selected_tools=["decide"],
                                   )

        # Execute the plan
        self.apply_plan(plan)

# Create the Model.

In [10]:
class Model(mesa.Model):
    """A model with some number of agents."""

    def __init__(self, n=2, seed=None):
        super().__init__(seed=seed)
        self.num_agents = n

        # Create agents
        agents = PrisonerAgent.create_agents(model=self, n=n,
                                        step_prompt="Decide your state depending on the conditions.",)


    def step(self):
      print("Steps:",self.steps)
      self.agents.shuffle_do("step")

# Create tool
We will create a decide tool using which the agent can decide its state- whether to DEFECT or COOPERATE

In [11]:
from enum import Enum
class PrisonerState(Enum):
    COOPERATE = 1
    DEFECT = 2

In [12]:
from mesa_llm.tools.tool_decorator import tool

from mesa_llm.llm_agent import LLMAgent

@tool(tool_manager=prisoner_tool_manager)
def decide(agent: "LLMAgent",state: str) -> str:
    """
    Change the state of the agent. The state can be "COOPERATE" or "DEFECT"

        Args:
            state: The state to change the agent to. Must be one of the following: "COOPERATE" or "DEFECT"
            agent: Provided automatically

        Returns:
            a string confirming the agent's new state.
    """
    state_map = {
        "COOPERATE": PrisonerState.COOPERATE,
        "DEFECT": PrisonerState.DEFECT,
    }
    if state not in state_map:
        raise ValueError(f"Invalid state: {state}")
    agent.state = state_map[state]
    return f"agent {agent.unique_id} selected state {state}."


## Initialise the model

In [13]:
model = Model()

In [14]:
model.step()

Steps: 1
