# Prisoner's Dilemma
This example demonstrates the Prisoner's Dilemma.

# Setup
This tutorial uses the free Deepseek model using HuggingFace. You will need a HuggingFace API key <br>
In your .env include <code> HUGGINGFACE_API_KEY = 'hf_your_token' </code>


# Install the necessary libraries.

In [1]:
#!pip install mesa
#!pip install mesa-llm
import mesa
import mesa_llm

In [None]:
#check if the installation is successful by uncommenting this line.
#help(mesa_llm)

In [5]:
#Load the environment variables 
from dotenv import load_dotenv
load_dotenv()

True

# Create the Prisoner Agent Class
The reasoning type used in this example is Chain of thought reasoning.

In [6]:
system_prompt = """
You are one of two prisoners interrogated separately.

Actions:
- COOPERATE (stay silent)
- DEFECT (betray the other)

Payoffs:
- Both cooperate: 1 year each
- You defect, other cooperates: you go free, other gets 3 years
- Both defect: 2 years each

You cannot communicate with the other prisoner.
Your objective is to minimize your own prison sentence.
Respond ONLY with COOPERATE or DEFECT.
"""

In [None]:
from mesa_llm.llm_agent import LLMAgent
from mesa_llm.reasoning.cot import CoTReasoning
from mesa_llm.tools.tool_manager import ToolManager

prisoner_tool_manager = ToolManager()

class MyAgent(LLMAgent,mesa.discrete_space.FixedAgent):
    def __init__(self, model, **kwargs):
        super().__init__(
            model=model,
            reasoning=CoTReasoning,
            llm_model="huggingface/together/deepseek-ai/DeepSeek-R1",
            system_prompt= system_prompt,           
            internal_state=["curious", "cooperative"],
            step_prompt="Decide what to do next based on your observations."
        )
        
        self.tool_manager= prisoner_tool_manager
        

    def step(self):
        # Generate current observation
        obs = self.generate_obs()

        # Use reasoning to create plan
        plan = self.reasoning.plan(obs=obs,
                                   selected_tools=["decide"],
                                   )

        # Execute the plan
        self.apply_plan(plan)

# Create the Model.

In [None]:
from mesa.space import MultiGrid
class Model(mesa.Model):
    """A model with some number of agents."""

    def __init__(self, n=2, seed=None):
        super().__init__(seed=seed)
        self.num_agents = n

        self.width = 10
        self.height = 5
        self.grid = MultiGrid(self.height, self.width, torus=False)

        # Create agents
        agents = MyAgent.create_agents(model=self, n=n,
                                        step_prompt="Change your state if the conditions indicate it.",)

        x = self.rng.integers(0, self.grid.width, size=(n,))
        y = self.rng.integers(0, self.grid.height, size=(n,))
        for a, i, j in zip(agents, x, y):
            self.grid.place_agent(a, (i, j))

    def step(self):
      print("Steps:",self.steps)
      self.agents.shuffle_do("step")

# Create tool

In [9]:
from enum import Enum
class PrisonerState(Enum):
    COOPERATE = 1
    DEFECT = 2

In [11]:
from mesa_llm.tools.tool_decorator import tool

from mesa_llm.llm_agent import LLMAgent

@tool(tool_manager=prisoner_tool_manager)
def decide(agent: "LLMAgent",state: str) -> str:
    """
    Change the state of the agent. The state can be "COOPERATE" or "DEFECT"

        Args:
            state: The state to change the agent to. Must be one of the following: "COOPERATE" or "DEFECT"
            agent: Provided automatically

        Returns:
            a string confirming the agent's new state.
    """
    state_map = {
        "COOPERATE": PrisonerState.COOPERATE,
        "DEFECT": PrisonerState.DEFECT,
    }
    if state not in state_map:
        raise ValueError(f"Invalid state: {state}")
    agent.state = state_map[state]
    return f"agent {agent.unique_id} selected state {state}."


## Initialise the model

In [None]:
model = Model()

In [15]:
model.step()

Steps: 1
