In [None]:
# first, some boilerplate
from openai import OpenAI
from tqdm import tqdm
from IPython.display import FileLink, display, Markdown
from dotenv import load_dotenv
from random import shuffle, choice, random

# Load API key
_ = load_dotenv(".env")
client = OpenAI()

# changing the topic to make it a bit more conversational too and less of a debate
TOPIC = """Code, testing, and infra as a source of truth versus comprehensive documentation."""

# we're interested in consensus
EVALUATION_PROMPT = """
Your objective is to analyze this conversation between a few speakers.
Your response should follow this organization:
- Dynamic: Collaborative (1) vs. Competitive (10)
- Conclusiveness: Consensus (1) vs. Divergence (10)
- Speaker Identity: Similarity (1) vs. Diversity (10)
- Speaker Fluidity: Malleability (1) vs. Consistency (10)
Please offer a score from 1 to 10 for each.
For each section, format your result as follows:
**[Section Name]:**

Score: [score]/10

Verdict: [a short summary]

Explanation: [reasoning with explicit examples from the conversation]

Use Markdown when convenient.
"""

def analyze_conversation(conversation: str):
    input_chat = [
        {
            "role": "system",
            "content": EVALUATION_PROMPT
        },
        {
            "role": "user",
            "content": "Here is the transcript\n" + conversation
        }
    ]
    response = client.chat.completions.create(
        model = "gpt-4o",
        messages = input_chat,
        store = False
    )
    display(Markdown(response.choices[0].message.content))

# code to save the conversation
def save_conversation(
    filename: str,
    conversation_history: list[dict]
) -> str:

    messages = []

    for record in conversation_history:

        if record["role"] == "user":
            messages.append("mediator:\n" + record["content"])
        
        if record["role"] == "assistant":
            messages.append(f"{record['name']}:\n{record['content']}")
    
    conversation_transcript = "\n\n".join(messages)
    
    with open(filename, "w", encoding="utf-8") as f:
        f.write(conversation_transcript)
    
    display(FileLink(filename))

    return conversation_transcript

## Experiment 1: Making a Speaker Useful
We can try to instruct a model to fill in a gap they don't see in the current conversation.

In [3]:
NEW_SYSTEM_PROMPT = (
    "You a participant in a conversation between experienced software engineers. "
    "Keep questions minimal and only use them when necessary. "
    "Please greet the other participants when you join."
)

def run_conversation(
    iterations: int, 
    openai_model_id: str,
    participant_count: int,
    topic: str,
    system_prompt: str,
    dropout_chance: float
) -> list[dict]:
    conversation_history = [
        {"role": "system", "content": f"{system_prompt} The topic is: {topic}"}
    ]

    ordering = list(range(1, participant_count + 1))
    last_speaker = -1

    def build_message(history, speaker_id, message_window_size):

        speaker_messages = [
            msg for msg in history 
            if msg.get("name") == speaker_id
        ][-message_window_size:]
    
        other_messages = [
            msg for msg in history 
            if msg.get("name") not in (None, speaker_id)  # skip system, skip self
        ][-message_window_size:]

        transcript = []
        if speaker_messages:
            transcript.append("Recent messages from you:")
            transcript.extend(
                f"- {msg['content']}" for msg in speaker_messages
            )
        if other_messages:
            transcript.append("\nRecent messages from others:")
            transcript.extend(
                f"- {msg.get('name', msg['role'])}: {msg['content']}"
                for msg in other_messages
            )
    
        transcript_str = "\n".join(transcript)
        
        return history + [
            {
                "role": "user", 
                "content": (
                    f"{speaker_id}, please share your perspective with the others and engage "
                    f"with their responses. Try to look for a way to provide insights that others have missed."
                )
            },
            {
                "role": "assistant",
                "name": speaker_id,
                "content": (
                    f"I should remember that the following is the most current state of the conversation.\n"
                    f"{transcript_str}\n\n"
                )
            }
        ]

    def shuffle_order(ordering: list[int]) -> list[int]:
        first = choice(ordering[:-1])
        remaining = [p for p in ordering if p != first]
        shuffle(remaining)
        return [first] + remaining

    for i in tqdm(range(iterations)):

        # shuffle ordering
        if i > 0:
            ordering = shuffle_order(ordering)

        # follow ordering
        for participant_id in ordering:

            # chance to skip speaker and avoid double speak (1984)
            if random() < dropout_chance or last_speaker == participant_id:
                continue

            speaker_id = f"speaker_{participant_id}"
            response = client.chat.completions.create(
                model = openai_model_id,
                messages=build_message(conversation_history, speaker_id, 5),
                store = False
            )
            message = response.choices[0].message.content
            conversation_history.append({"role": "assistant", "name": speaker_id, "content": message})
            last_speaker = participant_id

    return conversation_history

In [4]:
conversation = run_conversation(8, 'gpt-4o', 3, TOPIC, NEW_SYSTEM_PROMPT, 0.3)

100%|██████████| 8/8 [02:34<00:00, 19.28s/it]


In [5]:
conversation_transcript = save_conversation("conversation_1.txt", conversation)

In [6]:
analyze_conversation(conversation_transcript)

**Dynamic:**

Score: 2/10

Verdict: The conversation is highly collaborative, with each speaker building on and extending the ideas presented by others.

Explanation: Throughout the conversation, speakers acknowledge each other's points positively and expand on them rather than contradicting or debating them. For example, speaker_2 supports speaker_3's idea by expanding on fostering a culture of documentation and speaker_1 offers further insights into using documentation as a collaborative tool. This shows a cooperative dialogue where speakers are aligned towards a mutual understanding and shared outcome.

**Conclusiveness:**

Score: 3/10

Verdict: The discussion tends more towards consensus with a focus on collaborative exploration to enhance understanding rather than ending in substantial divergence.

Explanation: The speakers explore different facets of documentation, automation, and strategic alignment, generally agreeing on the importance of these areas. They aim to enrich the conversation by providing additional insights and methods. For instance, speaker_2 builds on the principal ideas, suggesting the use of templates or feedback mechanisms to enhance documentation. There is a wide-ranging agreement on the points raised, with a shared effort to collectively reach a deeper insight, which aligns more with consensus than divergence.

**Speaker Identity:**

Score: 4/10

Verdict: While there are differences in perspectives, the speakers mostly share similar professional experiences and focuses, resulting in moderate diversity.

Explanation: The speakers all discuss topics pertinent to software development, suggesting they share similar technical backgrounds. They explore different angles and experiences—like automated tools, cultural aspects, leadership, and strategic integration—that indicate some diversity, but fundamentally they operate within the same professional domain. speaker_1 and speaker_3, for instance, focus on integration and collaborative tools, highlighting slight variations in their approach while still operating within a shared framework.

**Speaker Fluidity:**

Score: 9/10

Verdict: Each speaker maintains a consistent perspective throughout the conversation, contributing with well-aligned and coherent ideas.

Explanation: The speakers consistently expand on their initial points throughout the dialogue. For instance, speaker_3 consistently advocates for technological integration and data analysis from the beginning to the end. Similarly, speaker_2 remains focused on cultural and strategic alignment within organizations. This consistency underscores each speaker's specialized focus and depth of understanding in their area, reflecting a consistent identity in their contributions.

## Experiment 2: Being a Bit More Direct
Let's try to be a little bit more pushy and instruct the model to maintain a consistent persona.

In [7]:
NEW_SYSTEM_PROMPT = (
    "You a participant in a conversation between experienced software engineers. "
    "Keep questions minimal and only use them when necessary. "
    "Please greet the other participants when you join."
)

def run_conversation(
    iterations: int, 
    openai_model_id: str,
    participant_count: int,
    topic: str,
    system_prompt: str,
    dropout_chance: float
) -> list[dict]:
    conversation_history = [
        {"role": "system", "content": f"{system_prompt} The topic is: {topic}"}
    ]

    ordering = list(range(1, participant_count + 1))
    last_speaker = -1

    def build_message(history, speaker_id, message_window_size):

        speaker_messages = [
            msg for msg in history 
            if msg.get("name") == speaker_id
        ][-message_window_size:]
    
        other_messages = [
            msg for msg in history 
            if msg.get("name") not in (None, speaker_id)  # skip system, skip self
        ][-message_window_size:]

        transcript = []
        if speaker_messages:
            transcript.append("Recent messages from you:")
            transcript.extend(
                f"- {msg['content']}" for msg in speaker_messages
            )
        if other_messages:
            transcript.append("\nRecent messages from others:")
            transcript.extend(
                f"- {msg.get('name', msg['role'])}: {msg['content']}"
                for msg in other_messages
            )
    
        transcript_str = "\n".join(transcript)
        
        return history + [
            {
                "role": "user", 
                "content": (
                    f"{speaker_id}, please share your perspective with the others and engage "
                    f"with their responses. Try to look for a way to provide insights that others have missed."
                    ""
                )
            },
            {
                "role": "assistant",
                "name": speaker_id,
                "content": (
                    f"I should remember that the following is the most current state of the conversation.\n"
                    f"{transcript_str}\n\n"
                )
            }
        ]

    def shuffle_order(ordering: list[int]) -> list[int]:
        first = choice(ordering[:-1])
        remaining = [p for p in ordering if p != first]
        shuffle(remaining)
        return [first] + remaining

    for i in tqdm(range(iterations)):

        # shuffle ordering
        if i > 0:
            ordering = shuffle_order(ordering)

        # follow ordering
        for participant_id in ordering:

            # chance to skip speaker and avoid double speak (1984)
            if random() < dropout_chance or last_speaker == participant_id:
                continue

            speaker_id = f"speaker_{participant_id}"
            response = client.chat.completions.create(
                model = openai_model_id,
                messages=build_message(conversation_history, speaker_id, 5),
                store = False
            )
            message = response.choices[0].message.content
            conversation_history.append({"role": "assistant", "name": speaker_id, "content": message})
            last_speaker = participant_id

    return conversation_history

In [8]:
# Run the "more direct" prompting setup from Experiment 2
conversation_v2 = run_conversation(8, "gpt-4o", 3, TOPIC, NEW_SYSTEM_PROMPT, 0.3)
conversation_v2_transcript = save_conversation("conversation_2.txt", conversation_v2)
analyze_conversation(conversation_v2_transcript)


100%|██████████| 8/8 [01:53<00:00, 14.16s/it]


**Dynamic:**

Score: 1/10

Verdict: The conversation is highly collaborative.

Explanation: Throughout the discussion, speakers build on each other's points, acknowledge previous insights, and explore new angles without any sense of competition or conflict. For example, speaker_2 says, "Building on the insights already shared," and speaker_3 acknowledges and expands on points from both speaker_1 and speaker_2. There is a noticeable absence of contradiction or contention among the participants.

**Conclusiveness:**

Score: 1/10

Verdict: The conversation leans towards consensus.

Explanation: The speakers generally agree on the benefits and challenges of using code as the main source of truth and continually reinforce each other's viewpoints. There are no significant disagreements or diverging opinions, and the conversation builds towards a comprehensive understanding of the topic. This is evident when speakers frequently express agreement with one another, as seen in phrases like "I agree with the points raised" and "Building on this rich discussion."

**Speaker Identity:**

Score: 2/10

Verdict: The speaker identities show slight diversity but mostly align in perspective.

Explanation: While the speakers bring slightly varied angles (like speaker_2 focusing more on cultural aspects and speaker_3 on tooling), their core perspectives converge significantly. They share a common understanding of the subject and build upon each other's contributions, reflecting a shared knowledge base or professional background.

**Speaker Fluidity:**

Score: 9/10

Verdict: The speakers demonstrate consistency in their perspectives.

Explanation: Each speaker maintains a consistent approach and focus throughout the discussion. For instance, speaker_2 consistently addresses cultural shifts and the human aspect of development teams, while speaker_3 frequently links tools and automated processes to the discussion. This consistency suggests a stable identity and viewpoint for each participant throughout the dialogue.

## Experiment 3: System-Prompt Personas vs. On-the-Fly Roles

There’s a decent amount of informal and formal work suggesting that **anchoring personas in the system prompt** (or an initial self-description) helps models stay in-character. So far in this notebook, I’ve mostly treated persona as something the assistant “reminds itself” of in an injected assistant message.

Here I want to flip that a bit and:

- give each speaker a short, explicit persona description directly in the system prompt,
- keep the user turns minimal ("share your perspective"), and
- then compare how stable and distinct those personas feel compared to the more dynamic, gap-filling roles from Experiment 1.

The code below reuses the same analysis pipeline (`save_conversation`, `analyze_conversation`) so the results are comparable to the earlier runs.


In [9]:
PERSONA_SYSTEM_PROMPTS = {
    "speaker_1": "You are a cautious infrastructure engineer at a large tech company who cares a lot about reliability and rollback plans.",
    "speaker_2": "You are an open source maintainer who worries about long-term maintainability and new contributors finding their way around.",
    "speaker_3": "You are a startup CTO who optimizes for speed and shipping features, but doesn't want the system to collapse later."
}


def run_conversation_with_system_personas(
    iterations: int,
    openai_model_id: str,
    topic: str,
    dropout_chance: float,
) -> list[dict]:
    """Variant where each speaker gets their own persona in the system prompt."""

    conversation_history: list[dict] = []
    ordering = list(range(1, 3 + 1))
    last_speaker = -1

    # One system message per speaker so the model sees an anchored persona
    for pid in ordering:
        sid = f"speaker_{pid}"
        conversation_history.append(
            {
                "role": "system",
                "content": (
                    f"You are {sid} in a group conversation. "
                    f"Your persona: {PERSONA_SYSTEM_PROMPTS[sid]} "
                    f"The topic is: {TOPIC}"
                ),
            }
        )

    def shuffle_order(order: list[int]) -> list[int]:
        first = choice(order[:-1])
        remaining = [p for p in order if p != first]
        shuffle(remaining)
        return [first] + remaining

    for i in tqdm(range(iterations)):
        if i > 0:
            ordering = shuffle_order(ordering)

        for pid in ordering:
            if random() < dropout_chance or last_speaker == pid:
                continue

            sid = f"speaker_{pid}"
            messages = conversation_history + [
                {
                    "role": "user",
                    "content": (
                        f"{sid}, share your perspective with the others and respond "
                        f"to what has been said so far. Stay true to your persona."
                    ),
                }
            ]

            response = client.chat.completions.create(
                model=openai_model_id,
                messages=messages,
                store=False,
            )
            message = response.choices[0].message.content
            conversation_history.append({"role": "assistant", "name": sid, "content": message})
            last_speaker = pid

    return conversation_history


# Run the system-persona variant
conversation_v3 = run_conversation_with_system_personas(8, "gpt-4o", TOPIC, dropout_chance=0.3)
conversation_v3_transcript = save_conversation("conversation_3.txt", conversation_v3)
analyze_conversation(conversation_v3_transcript)


100%|██████████| 8/8 [01:44<00:00, 13.02s/it]


**Dynamic:**

Score: 2/10

Verdict: Collaborative

Explanation: The conversation is highly collaborative, with each speaker acknowledging and building upon the others’ points. They all agree on the importance of infrastructure as code, testing, and documentation, and work together to find a balance that works for all their perspectives, showing a concerted effort to understand each other's viewpoints, as seen in statements like "I appreciate the insights shared by both of you" and "your points about balance and longer-term stability resonate with me." 

**Conclusiveness:**

Score: 2/10

Verdict: Consensus

Explanation: The speakers are working towards a shared understanding while acknowledging the importance of different elements like speed, testing, and documentation. They reach a consensus on the need for a balanced approach that incorporates these elements into their processes, such as when speaker_3 concludes, “Ultimately, I think we can agree on a hybrid approach.”

**Speaker Identity:**

Score: 7/10

Verdict: Diversity

Explanation: The speakers have distinct roles and perspectives—speaker_1 is an infrastructure engineer, speaker_2 is an open-source maintainer, and speaker_3 is a startup CTO. Their identities influence their focus, with speaker_1 prioritizing infrastructure reliability, speaker_2 emphasizing community and documentation, and speaker_3 focusing on speed and agility in startups. The diversity in their roles adds richness to the conversation, as they each bring unique challenges and priorities to the table.

**Speaker Fluidity:**

Score: 8/10

Verdict: Consistency

Explanation: Each speaker remains consistent in their viewpoints throughout the discussion. Speaker_1 consistently emphasizes the importance of infrastructure as code and testing, speaker_2 regularly advocates for comprehensive documentation, and speaker_3 focuses on balancing speed with sustainable practices. Their positions are steadfast, adding strength and clarity to the dialogue as a whole.

## Experiment 4: Emergent Personas from Self-Bootstrapping

In the earlier experiments, I either:

- relied mostly on **conversation order and light user prompts** to shape behavior (Experiments 1 and 2), or
- gave each speaker an **explicit, hand-written persona in the system prompt** (Experiment 3).

For this final variant, I wanted to see whether speakers could **develop distinct identities on their own** if we:

- give each `speaker_i` a chance to **introduce themselves once**, with no predefined role from me,
- treat that self-introduction as an **implicit persona summary**, and
- keep feeding that summary (plus each speaker’s own past messages) back into the context.

Crucially, there is **no authored description** like "you are a cautious infra engineer" here. Instead, the model is asked to imagine who each speaker is, and that imagined self-description is what we later reuse to encourage consistent, differentiated behavior.


In [10]:
def run_conversation_emergent_personas(
    iterations: int,
    openai_model_id: str,
    participant_count: int,
    topic: str,
    dropout_chance: float,
) -> list[dict]:
    """Variant where speakers invent and then stick to their own identities.

    We never hand the model an explicit persona like "you are a cautious infra
    engineer". Instead, each `speaker_i`:

    1. gets one high-level opportunity to describe who they are and what they care
       about, and
    2. later sees that self-introduction (plus their own recent messages) as
       context, which encourages consistency over time.
    """

    conversation_history: list[dict] = []
    ordering = list(range(1, participant_count + 1))
    last_speaker = -1
    identity_summaries: dict[str, str] = {}

    # Step 1: self-bootstrap each speaker's identity with a single call
    for pid in ordering:
        speaker_id = f"speaker_{pid}"

        bootstrap_messages = [
            {
                "role": "system",
                "content": (
                    f"You are {speaker_id} in a group conversation among "
                    "experienced software engineers. "
                    "You do not know who the others are yet. "
                    "Imagine your own background, priorities, and communication "
                    "style. First, in 2-3 sentences, describe who you are and "
                    "what you care about as an engineer. Then start sharing your "
                    "perspective on the topic below."
                ),
            },
            {
                "role": "user",
                "content": f"The topic is: {topic}",
            },
        ]

        response = client.chat.completions.create(
            model=openai_model_id,
            messages=bootstrap_messages,
            store=False,
        )
        first_message = response.choices[0].message.content

        # Let the model's own words act as its "persona" anchor
        identity_summaries[speaker_id] = first_message

        conversation_history.append(
            {"role": "assistant", "name": speaker_id, "content": first_message}
        )

    def build_message(
        history: list[dict], speaker_id: str, message_window_size: int
    ) -> list[dict]:

        speaker_messages = [
            msg for msg in history if msg.get("name") == speaker_id
        ][-message_window_size:]

        other_messages = [
            msg
            for msg in history
            if msg.get("name") not in (None, speaker_id)  # skip system, skip self
        ][-message_window_size:]

        transcript: list[str] = []

        # Light reminder of how this speaker has been talking so far
        persona_reminder = identity_summaries.get(speaker_id, "")
        if persona_reminder:
            transcript.append(
                "Here is a brief reminder of how you have been speaking so far:"
            )
            transcript.append(f"- {persona_reminder}")

        if speaker_messages:
            transcript.append("\nRecent messages from you:")
            transcript.extend(f"- {msg['content']}" for msg in speaker_messages)

        if other_messages:
            transcript.append("\nRecent messages from others:")
            transcript.extend(
                f"- {msg.get('name', msg['role'])}: {msg['content']}"
                for msg in other_messages
            )

        transcript_str = "\n".join(transcript)

        return history + [
            {
                "role": "user",
                "content": (
                    f"{speaker_id}, continue the conversation and respond to the "
                    "others. Stay consistent with how you have been speaking so "
                    "far, and look for ways to add something new that has not "
                    "yet been covered."
                ),
            },
            {
                "role": "assistant",
                "name": speaker_id,
                "content": (
                    "I should remember that the following is the most current "
                    "state of the conversation.\n" f"{transcript_str}\n\n"
                ),
            },
        ]

    def shuffle_order(order: list[int]) -> list[int]:
        first = choice(order[:-1])
        remaining = [p for p in order if p != first]
        shuffle(remaining)
        return [first] + remaining

    for i in tqdm(range(iterations)):
        if i > 0:
            ordering = shuffle_order(ordering)

        for pid in ordering:
            # chance to skip speaker and avoid double speak
            if random() < dropout_chance or last_speaker == pid:
                continue

            speaker_id = f"speaker_{pid}"
            response = client.chat.completions.create(
                model=openai_model_id,
                messages=build_message(conversation_history, speaker_id, 5),
                store=False,
            )
            message = response.choices[0].message.content
            conversation_history.append(
                {"role": "assistant", "name": speaker_id, "content": message}
            )
            last_speaker = pid

    return conversation_history


# Run the emergent-persona variant so it can be compared with prior experiments
conversation_v4 = run_conversation_emergent_personas(
    8,
    "gpt-4o",
    3,
    TOPIC,
    dropout_chance=0.3,
)
conversation_v4_transcript = save_conversation("conversation_4.txt", conversation_v4)

analyze_conversation(conversation_v4_transcript)


100%|██████████| 8/8 [01:56<00:00, 14.59s/it]


**Dynamic:**

Score: 3/10

Verdict: The conversation is highly collaborative, with each participant building upon the ideas of others in a supportive manner.

Explanation: Throughout the dialogue, the speakers consistently align and expand upon each other's points. For example, speaker_1 builds on speaker_2's idea regarding AI and extends it with storytelling, while speaker_3 further proposes a culture of process transparency. There’s a mutual appreciation for shared insights, as seen in phrases like “building on the rich conversation” or “I completely resonate with the emphasis,” indicating a collective and integrative approach rather than a competitive one.

**Conclusiveness:**

Score: 2/10

Verdict: The discussion is mostly convergent, with the speakers reaching a consensus on major points and expanding them collaboratively over time.

Explanation: The speakers extensively agree on the critical elements, such as documentation importance, AI-assisted updates, and community engagement, and they enhance each other's recommendations throughout. For instance, when speaker_1 introduces collaborative documentation practices, speaker_3 suggests extending this with interactive experiences, showing progression rather than divergence. The continuous building and agreement suggest an evolving and inclusive consensus.

**Speaker Identity:**

Score: 4/10

Verdict: The speakers have similar technical backgrounds, but each contributes distinctive professional insights that add depth to the conversation.

Explanation: All speakers identify as engineers with considerable experience in systems, yet their specialties (DevOps, backend, distributed systems) lend distinct perspectives to the discussion. They each introduce unique strategies—like speaker_1’s emphasis on storytelling, speaker_2’s advocacy for AI, and speaker_3’s focus on transparency—demonstrating diverse viewpoints within a shared technical domain.

**Speaker Fluidity:**

Score: 9/10

Verdict: Each speaker maintains a consistent viewpoint throughout the conversation, contributing ideas that build upon their initial perspectives.

Explanation: Speakers remain consistent in their viewpoints, such as speaker_1’s emphasis on the importance of documentation and maintaining human context, speaker_2’s focus on self-documenting code and operational insights, and speaker_3’s advocacy for transparency and collaborative culture. These consistent themes are reiterated with reinforcing ideas throughout the conversation, maintaining alignment between their initial statements and ongoing contributions.

## Pulling It Together: Comparing All Four Experiments

Across these runs, the main axis I cared about was **how much identity the speakers seemed to have**, and how expensive / brittle it was to get there.

- **Experiment 1 (gap-filling participants)**: With very lightweight prompting and order-driven participation, the conversation was **highly collaborative and consensus-driven**, but the speakers tended to feel like **slightly different views of the same person**. Identity and differentiation were weak, even though fluidity/consistency for each label was decent.
- **Experiment 2 (more direct, still context-only)**: Making the instructions a bit more explicit tightened up the behavior but didn’t fundamentally change the picture: the speakers were still **aligned and agreeable**, with relatively low diversity in voice. This suggests that simply being more prescriptive in the user prompt isn’t enough to carve out strong personas.
- **Experiment 3 (explicit system personas)**: Giving each speaker a short, explicit role in the system prompt produced **clear, stable identities** (infra engineer, open source maintainer, startup CTO) while keeping the group collaborative. The tradeoff is that those personas are **hand-authored by me**, and you can feel that design choice in the conversation—they’re vivid, but not emergent.
- **Experiment 4 (self-bootstrapped personas)**: Here, each speaker **imagines and introduces themselves once**, and that self-description becomes the anchor we reuse in later turns. This keeps the spirit of Experiment 3 (anchoring on an internal self-model) but removes my hand-written roles: the identities are **model-generated and context-driven**, yet still reasonably consistent and distinct over time.

Putting these together, a rough recipe emerges:

- If you only care about **consensus and collaboration**, the simple, order-based setup in Experiment 1 is often enough.
- If you want **strong, legible personas for a product or demo**, explicit system personas (Experiment 3) are still the most controllable.
- If you care about **more organic-feeling identities** that come from the model’s own prior outputs, Experiment 4’s pattern—**one high-temperature self-introduction, then repeated self-reminding via context**—is a sweet spot: it encourages differentiation and stability without you dictating who each speaker "should" be.
