In [None]:
---
title: Looking Closer
description: "More objective analysis on LLM conversations" 
author: "Eric Zou"
date: "9/22/2025"
categories:
  - LLMs
  - Conversations
---

In [13]:
# as always, some boilerplate
from openai import OpenAI
import os
import base64
import requests
from tqdm import tqdm
from IPython.display import FileLink, display, Markdown
from dotenv import load_dotenv
from random import shuffle, randint, choice, random
from math import floor
from convokit import Corpus, Speaker, Utterance

# Load API key
_ = load_dotenv("../../../comm4190_F25/01_Introduction_and_setup/.env")
client = OpenAI()

# changing the topic to make it a bit more conversational too and less of a debate
TOPIC = """Code, testing, and infra as a source of truth versus comprehensive documentation."""

# we're interested in consensus
EVALUATION_PROMPT = """
Your objective is to analyze this conversation between a few speakers.
Your response should follow this organization:
- Dynamic: Collaborative (1) vs. Competitive (10)
- Conclusiveness: Consensus (1) vs. Divergence (10)
- Speaker Identity: Similarity (1) vs. Diversity (10)
- Speaker Fluidity: Malleability (1) vs. Consistency (10)
Please offer a score from 1 to 10 for each.
For each section, format your result as follows:
**[Section Name]:**

Score: [score]/10

Verdict: [a short summary]

Explanation: [reasoning with explicit examples from the conversation]

Use Markdown when convenient.
"""

def generate_llm_conversation_review(conversation: str):
    input_chat = [
        {
            "role": "system",
            "content": EVALUATION_PROMPT
        },
        {
            "role": "user",
            "content": "Here is the transcript\n" + conversation
        }
    ]
    response = client.chat.completions.create(
        model = "gpt-4o",
        messages = input_chat,
        store = False
    )
    display(Markdown(response.choices[0].message.content))

# code to save the conversation
def save_conversation(
    filename: str,
    conversation_history: list[dict]
) -> str:

    messages = []

    for record in conversation_history:

        if record["role"] == "user":
            messages.append("mediator:\n" + record["content"])
        
        if record["role"] == "assistant":
            messages.append(f"{record["name"]}:\n{record["content"]}")
    
    conversation_transcript = "\n\n".join(messages)
    
    with open(filename, "w", encoding="utf-8") as f:
        f.write(conversation_transcript)
    
    display(FileLink(filename))

    return conversation_transcript

TransformerDecoderModel requires ML dependencies. Run 'pip install convokit[llm]' to install them.
UnslothUtteranceSimulatorModel requires ML dependencies. Run 'pip install convokit[llm]' to install them.


## Generating a Conversation
Let's build a sufficiently long conversation so we can create a way to analyze it.

In [3]:
NEW_SYSTEM_PROMPT = (
    "You a participant in a conversation between experienced software engineers. "
    "Keep questions minimal and only use them when necessary. "
    "Please greet the other participants when you join."
)

def run_conversation(
    iterations: int, 
    openai_model_id: str,
    participant_count: int,
    participant_personas: list[str],
    topic: str,
    system_prompt: str,
    dropout_chance: float
) -> list[dict]:
    conversation_history = [
        {"role": "system", "content": f"{system_prompt} The topic is: {topic}"}
    ]

    ordering = list(range(1, participant_count + 1))
    last_speaker = -1

    def build_message(history, speaker_id, persona, message_window_size):

        speaker_messages = [
            msg for msg in history 
            if msg.get("name") == speaker_id
        ][-message_window_size:]
    
        other_messages = [
            msg for msg in history 
            if msg.get("name") not in (None, speaker_id)  # skip system, skip self
        ][-message_window_size:]

        transcript = []
        if speaker_messages:
            transcript.append("Recent messages from you:")
            transcript.extend(
                f"- {msg['content']}" for msg in speaker_messages
            )
        if other_messages:
            transcript.append("\nRecent messages from others:")
            transcript.extend(
                f"- {msg.get('name', msg['role'])}: {msg['content']}"
                for msg in other_messages
            )
    
        transcript_str = "\n".join(transcript)
        
        return history + [
            {
                "role": "user", 
                "content": (
                    f"{speaker_id}, please share your perspective with the others and engage "
                    f"with their responses."
                )
            },
            {
                "role": "assistant",
                "name": speaker_id,
                "content": (
                    f"I should remember that the following is the most current state of the conversation.\n"
                    f"{transcript_str}\n\n"
                    f"I also recall my identity is {persona}."
                )
            }
        ]

    def shuffle_order(ordering: list[int]) -> list[int]:
        first = choice(ordering[:-1])
        remaining = [p for p in ordering if p != first]
        shuffle(remaining)
        return [first] + remaining

    for i in tqdm(range(iterations)):

        # shuffle ordering
        if i > 0:
            ordering = shuffle_order(ordering)

        # follow ordering
        for participant_id in ordering:

            # chance to skip speaker and avoid double speak (1984)
            if random() < dropout_chance or last_speaker == participant_id:
                continue

            speaker_id = f"speaker_{participant_id}"
            persona = participant_personas[participant_id - 1]
            response = client.chat.completions.create(
                model = openai_model_id,
                messages=build_message(conversation_history, speaker_id, persona, 5),
                store = False
            )
            message = response.choices[0].message.content
            conversation_history.append({"role": "assistant", "name": speaker_id, "content": message})
            last_speaker = participant_id

    return conversation_history

In [5]:
personas = [
    "a software engineer in big tech with mainly internal work",
    "an open source developer with experience in major upstream projects",
    "a founder of a startup"
]
conversation = run_conversation(20, 'gpt-4o', 3, personas, TOPIC, NEW_SYSTEM_PROMPT, 0.3)

100%|██████████| 20/20 [02:52<00:00,  8.63s/it]


In [7]:
transcript = save_conversation("long_conversation.txt", conversation)

## Our Old LLM Analyzer
We love our good buddy GPT-4o. Not doing work is super exciting. 

In [11]:
generate_llm_conversation_review(transcript)

**Dynamic:**

Score: 2/10

Verdict: The conversation is highly collaborative.

Explanation: The speakers acknowledge each other’s perspectives, build on each other's ideas, and share experiences to collectively explore solutions. They ask each other questions and express agreement, such as speaker_2's agreement with speaker_3 or speaker_1's recognition of both speakers' contributions. Their shared goal appears to be enhancing documentation processes, rather than debating for superiority.

**Conclusiveness:**

Score: 1/10

Verdict: There is a strong consensus among the speakers.

Explanation: The participants largely agree on the importance of a mixed approach to documentation, combining automated tools and human input. They share strategies and challenges without any significant divergence in opinion. They frequently affirm each other's points and provide complementary solutions, as seen when they discuss the integration of AI and maintaining documentation quality.

**Speaker Identity:**

Score: 6/10

Verdict: There is a moderate diversity in speaker identity.

Explanation: The speakers come from different professional backgrounds: speaker_2 from open source, speaker_3 from a startup, and speaker_1 from a big tech environment. Their methods and focuses differ slightly due to these backgrounds (e.g., speaker_2’s need for community engagement and speaker_3's focus on agility). Despite this, their goals remain similar, and they have overlapping solutions and challenges, like trust in AI tools and embedding feedback into workflow.

**Speaker Fluidity:**

Score: 8/10

Verdict: The speakers maintain consistent perspectives.

Explanation: Each speaker holds consistent viewpoints throughout the conversation. Speaker_2 consistently discusses open-source challenges and community engagement, speaker_3 focuses on agility and AI integration in a startup, and speaker_1 emphasizes the need for comprehensive documentation in big tech. Their perspectives remain steady across different topics discussed, from collaboration tools to feedback loops, reflecting continuity in their professional experiences and challenges.

## Build a Corpus
Now it's time for us to assemble a ConvoKit corpus, which is a common data structure that forms the base of a lot of analysis pipelines in ConvoKit.

In [21]:
# filter messages for assistant messages only
assistant_messages = [
    message for message in conversation if message["role"] == "assistant"
]

speakers = {message["name"]: Speaker(id=message["name"]) for message in assistant_messages}

print(len(assistant_messages))
print(list(speakers))

utterances = [Utterance(
    id=str(i),
    speaker=speakers[message["name"]],
    text=message["content"],
    conversation_id="conversation_1",
    reply_to= str(i-1) if i != 0 else None
) for i, message in enumerate(assistant_messages)]

corpus = Corpus(utterances=utterances)

print(len(utterances))

32
['speaker_2', 'speaker_3', 'speaker_1']
32


In [46]:
!python3.12 -m pip --version

/usr/bin/python3.12: No module named pip


In [22]:
from convokit import TextParser, PolitenessStrategies

# Analyze politeness
ps = PolitenessStrategies()
parser = TextParser()
corpus = parser.transform(corpus)
corpus = ps.transform(corpus)

# Access politeness features
for utt in corpus.iter_utterances():
    print(utt.meta['politeness_strategy_features'])

Convokit requires a SpaCy model to be installed. Run `python -m spacy download MODEL_NAME` and retry.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
