In [1]:
import json
from pathlib import Path
import re
from typing import List, Dict, Any, Optional


def extract_model_name_from_filename(filename: str) -> str:
    parts = filename.split("-simulation")
    return parts[0]


def count_words(text: str) -> int:
    if not text or not text.strip():
        return 0
    words = re.findall(r"\b\w+\b", text.lower())
    return len(words)


def analyze_jsonl_simulation_responses(
    simulations_dir_path: str,
) -> Optional[List[Dict[str, Any]]]:
    sim_dir = Path(simulations_dir_path)
    all_response_metrics: List[Dict[str, Any]] = []
    simulation_files = list(sim_dir.glob("*.jsonl"))

    for file_path in simulation_files:
        model_name = extract_model_name_from_filename(file_path.name)
        with open(file_path, "r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                simulation_data = json.loads(line)
                user_turns = simulation_data.get("simulation", {}).get("userTurns", [])

                for turn_idx, turn in enumerate(user_turns):
                    system_response_obj = turn.get("systemResponse")
                    if not system_response_obj:
                        continue

                    utterance = system_response_obj.get("utterance")
                    if utterance is None:
                        continue

                    word_count = count_words(utterance)

                    all_response_metrics.append(
                        {
                            "file_name": file_path.name,
                            "model_name": model_name,
                            "simulation_index_in_file": i,
                            "turn_index": turn_idx,
                            "system_response_utterance": utterance,
                            "word_count": word_count,
                        }
                    )

    return all_response_metrics

In [2]:
import pandas as pd

simulations_path = "../simulations"
all_metrics = analyze_jsonl_simulation_responses(simulations_path)

if all_metrics:
    df = (
        pd.DataFrame(all_metrics)
        .groupby("model_name")["word_count"]
        .agg(["sum", "mean", "median", "std", "min", "max"])
        .round(4)
    )
    print(df)

                                 sum     mean  median      std  min  max
model_name                                                              
base                             422  23.4444    20.0  14.2590    7   54
claude-opus-4                   1017  56.5000    59.0   8.4244   25   63
claude-sonnet-4                  954  53.0000    52.0   3.8348   46   61
gemini-2.5-flash-preview-05-20   799  44.3889    43.0   6.3165   34   56
gemini-2.5-pro-preview           812  45.1111    48.0   6.5878   32   54
gpt-4.1                          980  54.4444    55.0   2.2022   51   59
gpt-4o                           927  51.5000    52.0   2.6624   46   56
