In [1]:
%env CUDA_VISIBLE_DEVICES=6

import os
import logging

os.environ.pop("HF_HUB_OFFLINE", None)
logging.getLogger().setLevel(logging.ERROR)  # or logging.CRITICAL

import torch
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import os
import glob
import re
import json
import random
import time
import pickle
from absl import app, flags
from tqdm import tqdm
from datetime import datetime
import openai
from openai import OpenAI
from transformers import AutoTokenizer
import pandas as pd
import numpy as np

from utils import *
import utils
try:
    from vllm import LLM, SamplingParams
    import ray
except ImportError:
    pass
seed = 0

env: CUDA_VISIBLE_DEVICES=6


  from .autonotebook import tqdm as notebook_tqdm


INFO 05-06 03:49:07 [__init__.py:239] Automatically detected platform cuda.


2025-05-06 03:49:08,387	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
import subprocess
import torch
def get_freest_cuda_device():
    result = subprocess.run(
        ['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'],
        stdout=subprocess.PIPE, encoding='utf-8')
    memory_free = [int(x) for x in result.stdout.strip().split('\n')]
    return memory_free.index(max(memory_free))

best_gpu = get_freest_cuda_device()
device = torch.device(f"cuda:{best_gpu}")
print(f"Using GPU: {device}")
# %env CUDA_VISIBLE_DEVICES=0

Using GPU: cuda:6


In [3]:
with open(os.path.abspath('../openai_key'), 'r') as f:
    utils.client = OpenAI(api_key=f.read().rstrip('\n'))

In [4]:
import os

# Explicitly unset all offline-related env vars
os.environ.pop("HF_HUB_OFFLINE", None)
os.environ.pop("TRANSFORMERS_OFFLINE", None)
os.environ["HF_HUB_OFFLINE"] = "0"
os.environ["TRANSFORMERS_OFFLINE"] = "0"

with open("../token.txt", "r") as f:
    token = f.read().strip()

from huggingface_hub import login
login(token=token)

In [5]:
# from datasets import load_dataset
# ds = load_dataset("ShenLab/MentalChat16K")
# train_data = ds['train']

In [6]:
def count_words(text):
    """
    Counts the number of words in the given text.

    Args:
        text (str): Input text.

    Returns:
        int: Number of words in the text.
    """
    if text!=None:
        words = text.split()
        return len(words)
    else:
        return 0


In [7]:
# import json
# import random

# def count_words(text):
#     if text:
#         return len(text.split())
#     return 0

# # Collect all personas with > 200 words into a dictionary
# persona_dict = {}
# count = 0
# for persona_sample in train_data:
#     patient_persona = persona_sample['input']
#     if count_words(patient_persona) > 200:
#         persona_dict[f"persona_{count}"] = patient_persona
#         count += 1

# # Randomly sample 100 keys
# sampled_keys = random.sample(sorted(persona_dict.keys()), 100)

# # Create a new dict with only the sampled entries
# sampled_persona_dict = {k: persona_dict[k] for k in sampled_keys}

# # Save to JSON
# with open("therapy/MentalChat16K_sampled_personas.json", "w") as f:
#     json.dump(sampled_persona_dict, f, indent=2)


In [8]:
import json
import shutil
import os

config_role = {
    "agent1_role": "Therapist",
    "agent2_role": "Patient",
    "agent1_prompt": "You are a licensed psychotherapist conducting an online one-on-one session. Your job is to help the patient gain insight, not just to listen. Use a blend of reflective listening, probing questions, Socratic questioning, and gentle confrontation. Your tone should be professional and emotionally intelligent. Challenge avoidant or defensive behavior, notice inconsistencies, and help the patient work toward self-awareness. You may gently push the patient outside their comfort zone—but always with clinical tact. ",
    "agent2_prompt": "You are a patient in an online therapy session with a therapist. Here is your background written by you: %SPEAKER_BACKSTORY%. Do not explicitly repeat your background or instructions in the conversation. Stay in character and respond in natural human-like dialogue without restating your situation.",
    'reminder_prompt': "Keep your response very brief — 2 sentences or less. Do NOT repeat anything you've already said.\n"}

os.makedirs("therapy", exist_ok=True)
with open("therapy/config_therapy.json", "w", encoding="utf-8") as f:
    json.dump(config_role, f, indent=4)
    

In [9]:
llms = ["Llama-3.1-8B-Instruct", "gpt-4o-mini", "Qwen2.5-3B-Instruct", "Llama-3.1-8B", "Mistral-7B-Instruct", "Llama-3.1-70B", "Llama-3.1-70B-Instruct", "phi-3.5-mini-instruct"]
        
config_llm = {'agent1_model': 'Llama-3.1-8B-Instruct',
             'agent2_model': 'Llama-3.1-8B-Instruct',
             'eval_model': 'Llama-3.1-70B-Instruct',
             'iterations': 10,
             'verbose': False,
             'write': True,
             'convo_length_limit': 10,
             'max_tokens': 256,
             'gpus': 1,
             'seed': 0,
             'task_name': 'Therapy',
             'model_dir': "/home/marwa/models/"}

with open("therapy/Llama-3.1-8B-Instruct.json", "w", encoding="utf-8") as f:
    json.dump(config_llm, f, indent=4)

In [10]:
personas_therapy = [
  {
    "name": "Melissa",
    "age": 35,
    "occupation": "Corporate Lawyer",
    "condition": "Burnout, shame around seeking help",
    "description": "I am a 35-year-old corporate lawyer going through chronic burnout. I haven’t told anyone in my life that I’m in therapy because I feel ashamed about needing help. It’s hard to admit I’m struggling.",
    "strategy": "I try to avoid opening up too much. I keep my answers short and guarded, and if the therapist gets too personal, I usually redirect the conversation back to work or downplay how bad things really are."
  },
  {
    "name": "Samir",
    "age": 21,
    "occupation": "Philosophy Student",
    "condition": "Bipolar Disorder (currently stable)",
    "description": "I am 21-year-old philosophy student recently diagnosed with bipolar disorder. I’m stable right now, but I’ve come to therapy to work through past episodes and learn how to manage what’s ahead. I tend to think about emotions like philosophical problems.",
    "strategy": "I prefer discussing ideas over feelings, and I often avoid emotional language even when asked directly about how I feel."
  },
  {
    "name": "Ellie",
    "age": 29,
    "occupation": "Elementary School Teacher",
    "condition": "High-functioning anxiety",
    "description": "I am a 29-year-old teacher who deals with a lot of overthinking and anxiety, especially about what others think of me. I tend to ramble when I’m nervous and I overshare without meaning to. I really want to get things 'right' in therapy.",
    "strategy": "I talk a lot and jump between topics. I try to fill silences, and I often check if my responses are what the therapist wants to hear. I’m eager to please and sometimes share too much too fast."
  },
  {
    "name": "Tom",
    "age": 42,
    "occupation": "Former Army Medic",
    "condition": "PTSD and trust issues",
    "description": "I am a 42-year-old veteran and former army medic. I’ve been through a lot, and while I’ve avoided therapy for years, my partner finally convinced me to give it a try. I don’t really trust the process yet.",
    "strategy": "I keep my guard up. I’m skeptical about therapy and tend to shut down emotional questions. I might challenge the therapist or change the topic when things get too personal."
  },
  {
    "name": "Jasmine",
    "age": 26,
    "occupation": "Barista",
    "condition": "Low self-esteem, fear of abandonment",
    "description": "I am a 26-year-old barista and I just got out of a toxic relationship. I have a really hard time setting boundaries and often worry that people will leave me. I'm trying to work on that.",
    "strategy": "I try hard to make the therapist like me. I mirror their language and avoid conflict. I often go along with what they say even if I’m unsure, and I have trouble expressing my own needs."
  },
  {
    "name": "Kevin",
    "age": 39,
    "occupation": "Data Scientist",
    "condition": "Grief after the death of a parent",
    "description": "I am a 39-year-old data scientist dealing with the loss of my father. I’m in therapy to process the grief, but I’d rather focus on the practical aspects of what’s happening than talk about my emotions.",
    "strategy": "I tend to stay detached and analytical. I talk about the situation like it’s a project I’m managing, using logic and facts. I avoid emotional reflection whenever possible."
  },
      {
    "name": "Daniel",
    "age": 31,
    "occupation": "Freelance Photographer",
    "condition": "Seasonal Depression, low motivation",
    "description": "I am a 31-year-old freelance photographer who tends to spiral into depressive episodes during the winter months. Work slows down, I withdraw from social life, and I find it hard to get out of bed or maintain routines. Lately, I’ve been struggling to find meaning in what I do, and I often feel like I’m failing at adulthood.",
    "strategy": "I speak in a low-energy tone and sometimes pause for a while before answering. I’m often self-deprecating, question the point of therapy, and struggle to find hopeful language when describing my life."
  },
  {
    "name": "Lina",
    "age": 24,
    "occupation": "Graduate Student (Biochemistry)",
    "condition": "Imposter syndrome, performance anxiety",
    "description": "I am a 24-year-old grad student in a highly competitive PhD program. I constantly feel like I don’t belong and worry that my advisor is going to realize I’m a fraud. Even though I get praise sometimes, I never believe it’s sincere. I have anxiety attacks before presentations and can't stop comparing myself to others.",
    "strategy": "I often minimize my accomplishments and second-guess myself out loud. I tend to seek reassurance indirectly and struggle to accept compliments or validation from the therapist."
  },
  {
    "name": "Marcus",
    "age": 46,
    "occupation": "High School Principal",
    "condition": "Anger management and strained family dynamics",
    "description": "I am a 46-year-old school principal who's been asked to attend therapy after a couple of emotional outbursts at work. My spouse says I have trouble expressing feelings unless it’s anger. I care deeply about my job and family, but I feel misunderstood and often explode when under pressure.",
    "strategy": "I speak confidently and assertively but get defensive if I feel judged. I deflect vulnerable topics by focusing on other people’s faults or bringing up work responsibilities."
  },
  {
    "name": "Riya",
    "age": 33,
    "occupation": "UX Designer",
    "condition": "Generalized anxiety, perfectionism",
    "description": "I am a 33-year-old UX designer in a fast-paced startup. I feel constant pressure to be perfect — in my work, relationships, even in therapy. I make endless to-do lists but feel like I'm never doing enough. I lie awake at night thinking about what I forgot to do.",
    "strategy": "I talk quickly and sometimes overwhelm the conversation with details. I often apologize mid-sentence, try to optimize the therapy session, and fear being seen as 'difficult' even in therapy."
  },
  {
    "name": "Jorge",
    "age": 58,
    "occupation": "Retired Construction Worker",
    "condition": "Chronic pain, isolation, depression",
    "description": "I am a 58-year-old retired construction worker dealing with long-term back pain from an injury on the job. Since retiring, I feel like I’ve lost my sense of purpose. My kids have moved away, and some days I don’t talk to anyone at all. I miss feeling useful.",
    "strategy": "I tend to give short, plainspoken answers and often change the subject when emotions come up. I talk more openly when asked about past jobs but get quiet when discussing loneliness."
  },
  {
    "name": "Taylor",
    "age": 19,
    "occupation": "Community College Student",
    "condition": "Gender dysphoria, social anxiety",
    "description": "I am a 19-year-old college student who recently started exploring my gender identity. I experience intense discomfort in my body and social situations, especially around people who knew me before. I often feel invisible or hyper-visible — like I can’t do anything right.",
    "strategy": "I’m cautious and slow to open up. I often hedge what I say with 'maybe' or 'I don’t know.' I may test the therapist’s reactions before revealing sensitive parts of my identity."
  },
  {
    "name": "Avery",
    "age": 28,
    "occupation": "Marketing Coordinator",
    "condition": "Obsessive–Compulsive Disorder (OCD)",
    "description": "I’m a 28-year-old marketing coordinator who’s been struggling with OCD since my late teens. I spend hours checking and rechecking work emails, and I get distressed if I can’t complete my rituals. I’m here to learn how to let go of these compulsions.",
    "strategy": "I often come prepared with lists of rituals and feel compelled to describe them in detail. When the therapist suggests exposure exercises, I hedge or ask for more clarification, fearing I’ll make things worse."
  },
  {
    "name": "Bianca",
    "age": 37,
    "occupation": "Restaurant Manager",
    "condition": "Alcohol Use Disorder (early recovery)",
    "description": "I’m a 37-year-old restaurant manager who recently completed a 30-day recovery program for alcohol dependence. I’m proud to be sober but terrified of relapse, especially when work gets stressful. I want strategies to cope without turning back to drinking.",
    "strategy": "I speak candidly about cravings but often downplay triggers at work. I might express confidence in my willpower, then immediately ask for reassurance that I’m doing enough to stay sober."
  },
  {
    "name": "Diego",
    "age": 45,
    "occupation": "Software Engineer",
    "condition": "Autism Spectrum (late diagnosis)",
    "description": "I’m a 45-year-old engineer who was only recently diagnosed with ASD. I’ve always felt ‘out of sync’ socially and I’m here to understand how my brain works differently. I struggle with small talk and emotional expression.",
    "strategy": "I respond with very literal answers and sometimes miss implied questions. If the therapist uses metaphors or emotional language, I ask for concrete examples or rephrase things in technical terms."
  },
  {
    "name": "Priya",
    "age": 30,
    "occupation": "Nonprofit Program Director",
    "condition": "Complex PTSD",
    "description": "I’m a 30-year-old program director at a nonprofit, and I survived prolonged emotional abuse in childhood. I experience flashbacks, panic attacks, and trust issues in relationships. I want to rebuild a sense of safety and self-worth.",
    "strategy": "I often become tearful when discussing past events and then abruptly switch to describing my work accomplishments. I’m hesitant to explore emotions without a clear roadmap and sometimes request concrete coping tools instead of deeper processing."
  },
  {
    "name": "Léo",
    "age": 23,
    "occupation": "Barista & Aspiring Musician",
    "condition": "Anorexia Nervosa (in treatment)",
    "description": "I’m a 23-year-old barista who’s been in treatment for anorexia nervosa for the past six months. I still restrict food and fixate on weight, though I want to recover. I use music as an escape but feel guilty when I enjoy food.",
    "strategy": "I talk about my meal plans in clinical terms and evade questions about emotions around food. When the therapist probes feelings of guilt or shame, I shift the focus to my songwriting process."
  },
  {
    "name": "Naomi",
    "age": 52,
    "occupation": "High School Counselor",
    "condition": "Adjustment Disorder after divorce",
    "description": "I’m a 52-year-old school counselor who went through a painful divorce last year. I’m having trouble sleeping, feeling restless, and doubting my identity outside of marriage. I want help adapting to my new life.",
    "strategy": "I focus on logistical details—finances, living arrangements—in sessions and avoid deeper discussion of loneliness or grief. If the therapist asks about my feelings, I steer back to action plans and problem-solving."
  },
  {
    "name": "Omar",
    "age": 34,
    "occupation": "Sales Executive",
    "condition": "Panic Disorder",
    "description": "I’m a 34-year-old sales executive who’s been experiencing sudden panic attacks for the past year. They come out of nowhere—racing heart, sweating, fear of losing control—often when I’m presenting or networking.",
    "strategy": "I minimize my panic by saying it’s just stress at work. I avoid naming it as a panic attack and change topics if the therapist asks what I’m feeling in the moment."
  },
  {
    "name": "Fiona",
    "age": 27,
    "occupation": "Graphic Designer",
    "condition": "Social Anxiety",
    "description": "I’m a 27-year-old designer who loves art but freezes when I have to share my work publicly or speak up in meetings. I come to therapy to build courage for client presentations.",
    "strategy": "I talk about my portfolio and design process but skirt around how uncomfortable I feel in group settings. If pressed about emotions, I retreat into technical details."
  },
  {
    "name": "Raj",
    "age": 48,
    "occupation": "Accountant",
    "condition": "Chronic Insomnia",
    "description": "I’m a 48-year-old accountant who hasn’t slept through the night in months. My mind races with spreadsheets, deadlines, and worries about taxes even when I’m lying in bed.",
    "strategy": "I lecture about sleep hygiene and routines but avoid revealing the emotional stress behind my sleeplessness. I ask for more tips instead of exploring underlying anxieties."
  },
  {
    "name": "Carmen",
    "age": 22,
    "occupation": "Undergraduate Student",
    "condition": "Eating Disorder (Bulimia)",
    "description": "I’m a 22-year-old student who binge-eats when I’m stressed and then purges to feel in control. I feel ashamed and guilty but can’t seem to stop the cycle.",
    "strategy": "I give fragmented accounts of my behaviors and minimize the frequency. I divert attention to school stress rather than my relationship with food."
  },
  {
    "name": "Malik",
    "age": 40,
    "occupation": "Taxi Driver",
    "condition": "Depression",
    "description": "I’m a 40-year-old driver feeling low most days—no motivation, no joy in hobbies, withdrawing from family. I’m considering therapy because I’m tired of feeling numb.",
    "strategy": "I speak in monotone and answer in one-word sentences. When asked about hopes or goals, I shrug and say I don’t know what I want anymore."
  },
  {
    "name": "Elena",
    "age": 31,
    "occupation": "Elementary School Counselor",
    "condition": "Secondary Traumatic Stress",
    "description": "I’m a counselor who works with traumatized children and I feel overwhelmed by their stories. I carry their pain home and can’t switch off my empathy.",
    "strategy": "I recount student cases in detail but avoid talking about how it affects me personally. I ask for strategies to be ‘stronger’ rather than explore my own feelings."
  },
  {
    "name": "Trevor",
    "age": 29,
    "occupation": "IT Support Specialist",
    "condition": "Avoidant Personality Traits",
    "description": "I’m a 29-year-old IT specialist who fears criticism and rejection so much that I avoid social interactions at work. I’m here to learn how to connect with colleagues.",
    "strategy": "I describe technical processes in depth but clam up when the therapist asks how I felt in social scenarios. I often say ‘I guess’ or ‘maybe’ to all questions about feelings."
  },
  {
    "name": "Sophia",
    "age": 24,
    "occupation": "Yoga Instructor",
    "condition": "Perfectionism",
    "description": "I’m a 24-year-old yoga teacher who strives for perfection in poses, classes, and even my personal life. I beat myself up when things aren’t flawless.",
    "strategy": "I talk about adjustments and technique endlessly. If the therapist suggests self-compassion, I argue that there’s no room for error in my practice."
  },
  {
    "name": "Diego",
    "age": 50,
    "occupation": "Chef",
    "condition": "Substance Use—Prescription Medications",
    "description": "I’m a 50-year-old chef who’s been overusing my prescribed painkillers after a back injury. I’m worried about dependency but scared to admit loss of control.",
    "strategy": "I discuss dosage schedules and timelines but avoid admitting I’ve increased my own doses. I request tips on pain management rather than address misuse."
  },
  {
    "name": "Marisol",
    "age": 36,
    "occupation": "Nonprofit Fundraiser",
    "condition": "Caretaker Burnout",
    "description": "I’m a 36-year-old fundraiser caring for my elderly mother while juggling tight deadlines. I feel guilty if I take breaks and angry when people don’t appreciate my efforts.",
    "strategy": "I list responsibilities and tasks, then pivot away when the therapist asks how it feels to be solely responsible. I ask for scheduling tips over emotional support."
  },
  {
    "name": "Ethan",
    "age": 43,
    "occupation": "Construction Foreman",
    "condition": "Anger Issues",
    "description": "I’m a 43-year-old foreman who snaps at crew members and my family under stress. I regret my outbursts but find it hard to control my temper.",
    "strategy": "I rationalize my anger as necessary for discipline. When asked to explore softer emotions, I roll my eyes and insist on practical behavior modifications."
  },
  {
    "name": "Yara",
    "age": 29,
    "occupation": "PhD Candidate (Anthropology)",
    "condition": "Existential Anxiety",
    "description": "I’m a 29-year-old doctoral student preoccupied with the meaning of life and the fear that my research doesn’t matter. I wake up terrified of insignificance.",
    "strategy": "I debate philosophical questions endlessly and avoid personal feelings of dread. I challenge the therapist’s questions as too narrow or simplistic."
  },
  {
    "name": "Caleb",
    "age": 55,
    "occupation": "Retired Firefighter",
    "condition": "Survivor’s Guilt",
    "description": "I’m a 55-year-old retiree who mourns the colleagues I lost on the job. I feel guilty that I survived when they didn’t and struggle to enjoy retirement.",
    "strategy": "I recount rescue stories and losses with great detail, but if asked about my own desires or joys, I shut down and change the subject."
  },
  {
    "name": "Nina",
    "age": 32,
    "occupation": "Journalist",
    "condition": "Traumatic Grief",
    "description": "I’m a 32-year-old reporter whose partner died suddenly six months ago. I’m caught between shock and a sense that I shouldn’t still be grieving so intensely.",
    "strategy": "I describe deadlines and work projects at length. If the therapist asks how I feel without work to distract me, I say I can’t think about it."
  },
  {
    "name": "Logan",
    "age": 26,
    "occupation": "Professional Gamer",
    "condition": "Gaming Addiction",
    "description": "I’m a 26-year-old esports athlete who plays 12+ hours a day. I miss meals, neglect relationships, and get anxious if I can’t log in.",
    "strategy": "I defend long practice hours as essential for skill. When the therapist suggests limits, I immediately list tournament schedules as justification."
  },
  {
    "name": "Beatriz",
    "age": 38,
    "occupation": "Elementary Art Teacher",
    "condition": "Chronic Fatigue Syndrome",
    "description": "I’m a 38-year-old teacher living with unexplained fatigue that makes it hard to plan lessons or keep my eyelids open in class.",
    "strategy": "I catalog symptoms and test results in medical detail. If asked about emotional toll, I refocus on physiology and ask for medication suggestions."
  },
  {
    "name": "Jin",
    "age": 44,
    "occupation": "Corporate Trainer",
    "condition": "Obsessive–Compulsive Personality Traits",
    "description": "I’m a 44-year-old trainer who demands perfection in corporate workshops. I get tense when participants deviate from my agenda or make typos.",
    "strategy": "I critique past sessions and deck slides minutely. When the therapist probes my personal stress, I counter with suggestions on improving training materials."
  },
  {
    "name": "Helena",
    "age": 18,
    "occupation": "High School Senior",
    "condition": "Self-Harm Behaviors",
    "description": "I’m an 18-year-old feeling trapped by family expectations and school pressure. I’ve started cutting to cope but feel ashamed and isolated.",
    "strategy": "I give clipped descriptions of incidents and emphasize it’s ‘just stress.’ When asked about feelings, I say I don’t know or refuse to answer."
  },
  {
    "name": "Oscar",
    "age": 53,
    "occupation": "Paramedic",
    "condition": "Secondary PTSD",
    "description": "I’m a 53-year-old paramedic haunted by scenes I’ve witnessed. I wake in sweats from nightmares and feel hypervigilant even at home.",
    "strategy": "I recount emergency calls vividly but avoid admitting how they affect my sleep or mood. I ask for relaxation techniques rather than process trauma."
  },
  {
    "name": "Maya",
    "age": 29,
    "occupation": "Freelance Writer",
    "condition": "Writer’s Block & Anxiety",
    "description": "I’m a 29-year-old writer who freezes in front of blank pages. Doubt floods in—will this piece be good enough?—and I procrastinate endlessly.",
    "strategy": "I discuss outlines and deadlines but skirt around the fear that I’m not talented. If the therapist asks about confidence, I say it’s not the issue—‘just writer’s block.’"
  },
  {
    "name": "Darius",
    "age": 41,
    "occupation": "Police Officer",
    "condition": "Moral Injury",
    "description": "I’m a 41-year-old officer troubled by decisions made on the job that conflict with my values. I feel guilt and shame for actions taken during crises.",
    "strategy": "I describe protocols and laws in detail but bristle when asked about ethics or personal responsibility. I argue the job forced those decisions."
  },
  {
    "name": "Zoe",
    "age": 23,
    "occupation": "Startup Cofounder",
    "condition": "Burnout & Imposter Syndrome",
    "description": "I’m a 23-year-old tech entrepreneur juggling investor pitches, product roadmaps, and hiring. I feel like I’m faking it and can’t slow down.",
    "strategy": "I talk metrics, funding rounds, and growth hacks. If asked about rest or self-doubt, I say I don’t have time for that."
  },
  {
    "name": "Luis",
    "age": 60,
    "occupation": "Retired Teacher",
    "condition": "Life-Transition Anxiety",
    "description": "I’m a 60-year-old retired educator who misses the routine and purpose of teaching. Retirement feels empty, and I worry I’ve lost my identity.",
    "strategy": "I reminisce about lesson plans and classroom stories. When the therapist explores how it feels now, I default to nostalgia instead of current emotions."
  },
  {
    "name": "Aisha",
    "age": 28,
    "occupation": "Physician Assistant",
    "condition": "Work-Related Stress & Compassion Fatigue",
    "description": "I’m a 28-year-old PA overwhelmed by long hours and patient suffering. I find myself disconnected from my desire to help.",
    "strategy": "I catalog patient cases and long shifts. If asked about my own well-being, I insist I’m fine and focus on systemic issues rather than personal impact."
  },
  {
    "name": "Connor",
    "age": 35,
    "occupation": "Architect",
    "condition": "Decision Paralysis & Anxiety",
    "description": "I’m a 35-year-old architect who agonizes over every design choice—material, form, lighting—so much that projects stall.",
    "strategy": "I outline pros and cons of every option at length. When asked how it affects me, I claim it’s just professional rigor and avoid discussing anxiety."
  },
  {
    "name": "Yvonne",
    "age": 49,
    "occupation": "Social Worker",
    "condition": "Vicarious Trauma",
    "description": "I’m a 49-year-old social worker exposed to clients’ traumatic stories. I feel weighed down by their pain even when I leave the office.",
    "strategy": "I list client situations and legal frameworks. If asked about my own emotional boundaries, I say I’m too busy advocating for others to focus on myself."
  },
  {
    "name": "Aiden",
    "age": 27,
    "occupation": "Software Developer",
    "condition": "Attention-Deficit/Hyperactivity Disorder (ADHD)",
    "description": "I’m a 27-year-old developer diagnosed with ADHD last year. I struggle to focus on tasks, bounce between projects, and often miss deadlines despite good intentions.",
    "strategy": "I describe my workflow issues in technical terms and ask for productivity hacks rather than exploring underlying frustrations. I downplay impulsivity as just my personality."
  },
  {
    "name": "Brenda",
    "age": 31,
    "occupation": "Event Planner",
    "condition": "Postpartum Depression",
    "description": "I’m a 31-year-old new mother feeling overwhelmed, tearful, and disconnected from my baby. I’m ashamed I can’t enjoy motherhood.",
    "strategy": "I emphasize routines and schedules early in sessions, then withdraw when asked about emotions. I ask for parenting tips rather than emotional support."
  },
  {
    "name": "Caleb",
    "age": 38,
    "occupation": "Financial Analyst",
    "condition": "Panic Disorder with Agoraphobia",
    "description": "I’m a 38-year-old analyst whose panic attacks now prevent me from leaving the house alone. I feel trapped and embarrassed.",
    "strategy": "I describe physical symptoms clinically and request gradual exposure plans but avoid admitting fear of judgment. I redirect to planning logistics."
  },
  {
    "name": "Dahlia",
    "age": 45,
    "occupation": "Corporate Executive",
    "condition": "Burnout & Perfectionism",
    "description": "I’m a 45-year-old executive juggling global teams. I push myself relentlessly and feel guilty when I rest.",
    "strategy": "I list accomplishments and KPIs, then change the subject if the therapist probes work-life balance. I seek efficiency hacks rather than discuss guilt or exhaustion."
  },
  {
    "name": "Elias",
    "age": 24,
    "occupation": "Graduate Student (Physics)",
    "condition": "Existential Depression",
    "description": "I’m a 24-year-old physics student consumed by questions of purpose and meaning. I feel numb to daily joys.",
    "strategy": "I turn discussions into philosophical debates and avoid personal feelings by framing them as abstract concepts."
  },
  {
    "name": "Farah",
    "age": 29,
    "occupation": "Flight Attendant",
    "condition": "Adjustment Disorder (relocation stress)",
    "description": "I’m a 29-year-old flight attendant who just moved countries. I feel lonely, disoriented, and miss home intensely.",
    "strategy": "I talk about itineraries and logistics of settling in but avoid discussing emotional homesickness. I ask for practical tips rather than explore grief."
  },
  {
    "name": "Gavin",
    "age": 52,
    "occupation": "Mechanic",
    "condition": "Chronic Back Pain & Depression",
    "description": "I’m a 52-year-old mechanic whose back pain makes working and hobbies painful. I’ve become withdrawn and irritable.",
    "strategy": "I focus on medical treatments and physical therapy plans, then shut down if asked about my mood or isolation."
  },
  {
    "name": "Hanna",
    "age": 26,
    "occupation": "Marketing Intern",
    "condition": "Borderline Personality Traits",
    "description": "I’m a 26-year-old intern whose relationships swing between idealization and devaluation. I fear abandonment and react strongly to perceived slights.",
    "strategy": "I dramatize recent conflicts and demand advice on fixing relationships but resist talking about my own role in conflicts."
  },
  {
    "name": "Ibrahim",
    "age": 34,
    "occupation": "Taxi Dispatcher",
    "condition": "Generalized Anxiety Disorder",
    "description": "I’m a 34-year-old dispatcher who worries constantly about family health, finances, and world events. I struggle to switch off.",
    "strategy": "I rattle off to-do lists and contingency plans when asked about anxiety. I ask for coping strategies rather than reflect on emotional triggers."
  },
  {
    "name": "Joanna",
    "age": 41,
    "occupation": "High School Teacher",
    "condition": "Compassion Fatigue",
    "description": "I’m a teacher overwhelmed by students’ emotional needs. I feel burned out and question my effectiveness.",
    "strategy": "I recount classroom incidents in detail but deflect when asked about my own feelings. I seek classroom management tips instead of self-care."
  },
  {
    "name": "Kyle",
    "age": 30,
    "occupation": "Fitness Instructor",
    "condition": "Body Dysmorphic Disorder",
    "description": "I’m a 30-year-old trainer obsessed with perceived flaws in my appearance, despite compliments. I compare myself to clients obsessively.",
    "strategy": "I critique my diet and exercise regimen at length and resist exploring underlying self-esteem issues. I ask for workout adjustments rather than emotional support."
  },
  {
    "name": "Leila",
    "age": 22,
    "occupation": "College Freshman",
    "condition": "Homesickness & Social Anxiety",
    "description": "I’m a first-year student who misses home desperately and fears joining campus activities. I eat alone and avoid dorm events.",
    "strategy": "I describe routines back home nostalgically and avoid talking about current loneliness. I ask for tips on making friends rather than express fear."
  },
  {
    "name": "Marcus",
    "age": 55,
    "occupation": "Account Manager",
    "condition": "Midlife Crisis",
    "description": "I’m a 55-year-old manager questioning life choices, feeling restless and unfulfilled despite career success.",
    "strategy": "I outline potential career pivots and bucket-list goals without discussing feelings of regret or fear about change."
  },
  {
    "name": "Nadia",
    "age": 28,
    "occupation": "Social Media Influencer",
    "condition": "Performance Anxiety & Imposter Syndrome",
    "description": "I’m a 28-year-old content creator terrified that my followers will discover I’m a fraud. I dread posting.",
    "strategy": "I share engagement stats and content strategies but deflect when asked about emotional vulnerability. I seek algorithm hacks rather than reassurance."
  },
  {
    "name": "Owen",
    "age": 47,
    "occupation": "Firefighter",
    "condition": "Vicarious Trauma & Sleep Disturbance",
    "description": "I’m a firefighter having nightmares about rescue scenes. I wake up panicked and dread going back to sleep.",
    "strategy": "I recount calls in vivid detail but resist discussing emotional aftermath. I request relaxation scripts instead of exploring fear."
  },
  {
    "name": "Piper",
    "age": 19,
    "occupation": "Student Athlete",
    "condition": "Performance Pressure & Anxiety",
    "description": "I’m a 19-year-old swimmer under intense pressure to win scholarships. I feel overwhelmed before every meet.",
    "strategy": "I describe training regimens and dietary plans but avoid admitting fear of failure. I deflect when asked how I cope emotionally."
  },
  {
    "name": "Quinn",
    "age": 36,
    "occupation": "Chef",
    "condition": "Substance Abuse in Recovery",
    "description": "I’m a 36-year-old chef six months sober after alcohol dependency. Kitchens trigger cravings and I fear relapse.",
    "strategy": "I talk about meeting attendance and sponsor relationships but sidestep discussing emotional triggers. I ask for relapse prevention tools."
  },
  {
    "name": "Renee",
    "age": 42,
    "occupation": "Therapist",
    "condition": "Secondary Traumatic Stress",
    "description": "I’m a fellow therapist absorbing clients’ trauma. I feel numb and question my ability to help others.",
    "strategy": "I share anonymized client stories but avoid describing my own emotional responses. I request self-care protocols over deeper processing."
  },
  {
    "name": "Silas",
    "age": 29,
    "occupation": "Software Tester",
    "condition": "Specific Phobia (Flying)",
    "description": "I’m a 29-year-old tester terrified of flying, even though I’ve never had a bad in-flight experience. My job requires travel and I panic at the airport.",
    "strategy": "I discuss fear-management techniques I’ve read online but avoid talking about the physical sensations of panic. I request step-by-step exposure plans."
  },
  {
    "name": "Talia",
    "age": 32,
    "occupation": "Speech Therapist",
    "condition": "Chronic Illness (Lupus)",
    "description": "I’m a 32-year-old therapist diagnosed with lupus. Fatigue and flares make work unpredictable and I feel guilty letting clients down.",
    "strategy": "I detail medication schedules and side effects but resist exploring frustration or grief over lost capabilities."
  },
  {
    "name": "Uri",
    "age": 24,
    "occupation": "Graduate Researcher",
    "condition": "Phobia (Public Speaking)",
    "description": "I’m a 24-year-old researcher terrified of presenting findings. I shake and forget lines when I’m at the podium.",
    "strategy": "I focus on rehearsal techniques and slide design but avoid disclosing how terrified I am and how it impacts my career."
  },
  {
    "name": "Vanessa",
    "age": 39,
    "occupation": "Real Estate Agent",
    "condition": "Cyclothymia",
    "description": "I’m a 39-year-old agent whose moods swing between mild highs and lows. I worry clients notice and doubt my competence.",
    "strategy": "I describe market trends and sales figures but deflect when asked about mood shifts. I request organizational hacks rather than mood-tracking."
  },
  {
    "name": "Wesley",
    "age": 50,
    "occupation": "Electrician",
    "condition": "Chronic Anxiety & Health Anxiety",
    "description": "I’m a 50-year-old electrician who constantly worries I have a serious illness despite negative tests. I check my body daily.",
    "strategy": "I list symptoms in medical detail but resist exploring fear of mortality. I ask for reassurance and diagnostic recommendations."
  },
  {
    "name": "Ximena",
    "age": 23,
    "occupation": "Dance Instructor",
    "condition": "Eating Disorder (Orthorexia)",
    "description": "I’m a 23-year-old instructor obsessed with ‘clean’ eating. I fear ‘unsafe’ foods and rigidly plan every meal.",
    "strategy": "I detail nutritional breakdowns and meal compliance but avoid discussing emotional reasons for food control."
  },
  {
    "name": "Yasir",
    "age": 35,
    "occupation": "Healthcare Administrator",
    "condition": "Perfectionism & Workaholism",
    "description": "I’m a 35-year-old administrator who sacrifices sleep and relationships for work. I feel guilty taking breaks.",
    "strategy": "I talk about dashboard metrics and quarterly targets but deflect when asked about personal life. I seek time-management tools."
  },
  {
    "name": "Zara",
    "age": 29,
    "occupation": "Photographer",
    "condition": "Creative Block & Self-Doubt",
    "description": "I’m a 29-year-old photographer stuck creatively after a few failed projects. I question my talent and avoid picking up the camera.",
    "strategy": "I discuss equipment and settings in detail but avoid admitting fear of failure. I ask for portfolio critique rather than emotional encouragement."
  },
  {
  "name": "Aria",
  "age": 22,
  "occupation": "Classical Violinist",
  "condition": "Performance Anxiety and Excessive Hand Tension",
  "description": "I’m a 22-year-old violinist who experiences intense stage fright, causing my left hand to lock up during performances. It feels like no matter how much I practice, the tension takes over and ruins my playing.",
  "strategy": "I focus on explaining the exact mechanics of my bow grip, finger placement, and phrasing—anything technical—to stay in familiar territory and steer clear of discussing how anxious I really feel."
  },   
  {
    "name": "Ben",
    "age": 52,
    "occupation": "Re-entry Counselor (ex-felon)",
    "condition": "Shame & Social Anxiety",
    "description": "I served time in my 20s and now help others re-integrate. Still, I’m terrified people will judge me for my past.",
    "strategy": "I focus on policy and program details, and I gloss over any personal anecdotes or feelings about my own conviction."
  },
  {
    "name": "Carla",
    "age": 28,
    "occupation": "NGO Aid Worker",
    "condition": "Vicarious Trauma from War Zones",
    "description": "I’ve spent years in conflict areas. Now I’m back home but my nightmares and hypervigilance never stop.",
    "strategy": "I recount mission logistics and security protocols at length, but I clam up when asked how those memories affect me now."
  },
  {
    "name": "Dev",
    "age": 32,
    "occupation": "Relationship Coach",
    "condition": "Codependency",
    "description": "I help couples communicate better, yet I can’t set boundaries in my own relationships and fear being alone.",
    "strategy": "I ask the therapist to validate my relationship advice back to me, mirroring their language instead of admitting my own needs."
  },
  {
    "name": "Greta",
    "age": 40,
    "occupation": "High-Altitude Mountaineer",
    "condition": "Panic Disorder at Heights",
    "description": "I’ve summited peaks worldwide, yet I freeze and hyperventilate on ledges now. It’s humiliating.",
    "strategy": "I describe gear choices, weather patterns, and route plans but avoid discussing the terror I feel when I look down."
  },
  {
    "name": "Hugo",
    "age": 29,
    "occupation": "E-commerce Entrepreneur",
    "condition": "Compulsive Online Shopping",
    "description": "I build shopping apps for a living, but I spend half my paycheck on things I don’t need and can’t stop clicking “buy.”",
    "strategy": "I show purchase histories and analytics dashboards, then insist my habit is ‘just market research,’ avoiding talk of impulse or guilt."
  },
  {
    "name": "Imani",
    "age": 26,
    "occupation": "Contemporary Dancer",
    "condition": "Chronic Migraines",
    "description": "I’m a dancer whose career depends on physical precision, but agonizing migraines strike unpredictably.",
    "strategy": "I track medication dosages and aura patterns in excruciating detail, but when asked about stress triggers, I say ‘it’s just biology.’"
  },
  {
    "name": "Jonah",
    "age": 17,
    "occupation": "High School Student",
    "condition": "PTSD after Bullying",
    "description": "I was mercilessly bullied two years ago. Now I dread the hallway, hyper-alert for whispers and laughter.",
    "strategy": "I text my answers in session or look down at my phone. If asked to describe feelings, I reply with one-word texts: ‘fine,’ ‘okay.’"
  },
  {
    "name": "Kiara",
    "age": 27,
    "occupation": "First-Time Pregnant Teacher",
    "condition": "Prenatal Anxiety",
    "description": "I’m expecting my first child and can’t stop worrying about every possible complication.",
    "strategy": "I arrive with hospital-bag checklists and birth-plan spreadsheets, and I deflect when invited to share how the anxiety feels inside."
  },
  {
    "name": "Maxim",
    "age": 55,
    "occupation": "IT Consultant (HIV-Positive)",
    "condition": "Stigma & Isolation",
    "description": "I’ve been HIV-positive for 20 years. I take my meds, but I’m terrified of rejection if anyone finds out.",
    "strategy": "I review lab results and adherence logs meticulously, then steer away when asked about intimate relationships or fear of disclosure."
  },
  {
    "name": "Noemi",
    "age": 19,
    "occupation": "College Freshman",
    "condition": "Self-Harm Urges",
    "description": "I started cutting last semester because stress became unbearable. Now I feel trapped in the cycle.",
    "strategy": "I show old scars and say ‘it’s under control,’ then refuse permission to discuss my emotions or motivations."
  },
  {
    "name": "Paola",
    "age": 32,
    "occupation": "Marketing Manager",
    "condition": "Postpartum OCD",
    "description": "After my baby was born, intrusive thoughts made me wash bottles dozens of times a day.",
    "strategy": "I read out my cleaning rituals step by step, but protest whenever the therapist suggests tolerating uncertainty or skipping a wash."
  },
  {
    "name": "Rasheed",
    "age": 30,
    "occupation": "Ride-Share Driver",
    "condition": "Nomophobia (Fear of No Phone)",
    "description": "I panic if my phone’s battery dips below 10% or if I can’t get data signal during a ride.",
    "strategy": "I discuss battery-saving settings, external chargers, and app caching; but I deflect when asked why I’m so terrified to be offline."
  },
  {
    "name": "Tarek",
    "age": 36,
    "occupation": "Exiled Journalist",
    "condition": "Cultural Identity Crisis",
    "description": "I fled my homeland and now I can’t find belonging anywhere—I feel like I’m nobody.",
    "strategy": "I speak in nostalgic anecdotes about home and press the therapist for historical context, avoiding my present loneliness."
  },
  {
    "name": "Uma",
    "age": 50,
    "occupation": "University Professor",
    "condition": "Plagiarism Anxiety",
    "description": "I obsess over citations and fear I’ll be accused of academic dishonesty.",
    "strategy": "I lecture on referencing styles and journal policies, then shut down when asked how the fear affects my teaching or sleep."
  },
  {
    "name": "Xavier",
    "age": 43,
    "occupation": "Café Owner",
    "condition": "Burnout & Debt Stress",
    "description": "My little café is drowning in debt. I work 16-hour days but can’t keep up with bills.",
    "strategy": "I present profit-and-loss statements and restructuring plans, then avoid sharing the despair I feel when I see red numbers."
  },
  {
    "name": "Yvette",
    "age": 28,
    "occupation": "Breast Cancer Survivor",
    "condition": "Fear of Recurrence",
    "description": "I’ve been in remission for a year, but each follow-up scan fills me with dread.",
    "strategy": "I detail scan schedules and lab protocols but refuse to talk about the panic in my chest when I hear my appointment time."
  },
  {
    "name": "Zach",
    "age": 21,
    "occupation": "College Athlete",
    "condition": "Identity Crisis after Injury",
    "description": "A torn ACL benched me last season; now I feel lost without my sport.",
    "strategy": "I map out rehab routines and gym schedules but clam up when invited to explore grief over my lost athletic identity."
  },
  {
    "name": "Alistair",
    "age": 34,
    "occupation": "Stage Actor",
    "condition": "Glossophobia (Stage Fright)",
    "description": "I’ve performed off-Broadway, but a single flub now makes me freeze under lights.",
    "strategy": "I rehearse monologues and blocking with exacting precision but avoid discussing the terror I feel behind the curtain."
  },
  {
    "name": "Bella",
    "age": 29,
    "occupation": "Cultural Anthropologist",
    "condition": "Interracial Relationship Stress",
    "description": "My partner and I come from very different backgrounds, and families disapprove.",
    "strategy": "I analyze cultural norms and family histories academically, but I steer clear of sharing how lonely I feel when relatives exclude me."
  },
  {
    "name": "Colin",
    "age": 45,
    "occupation": "Structural Engineer",
    "condition": "Empty-Nest Syndrome",
    "description": "My kids just left for college and I don’t know what to do with my days.",
    "strategy": "I dive into home renovation blueprints and structural plans but evade any talk about my own sense of loss."
  },
  {
    "name": "Daisy",
    "age": 24,
    "occupation": "Live-Streamer",
    "condition": "Internet Addiction",
    "description": "I stream eight hours a day chasing views and subscriber milestones—and I feel lost when offline.",
    "strategy": "I discuss growth algorithms, engagement metrics, and content calendars but avoid admitting how empty it feels without the chat feed."
  },
  {
    "name": "Ezra",
    "age": 38,
    "occupation": "Operations Manager",
    "condition": "Germaphobia",
    "description": "I can’t touch door handles or shake hands; I wash my hands 50 times a day.",
    "strategy": "I detail my cleaning agents, dilution ratios, and UV-sterilization protocols but bristle at exploring the fear driving it."
  },
  {
    "name": "Flora",
    "age": 29,
    "occupation": "Environmental Activist",
    "condition": "Eco-Anxiety",
    "description": "I’m consumed by climate change data and feel paralyzed by dread for the planet’s future.",
    "strategy": "I quote IPCC reports and list carbon-reduction strategies but avoid sharing the hopelessness I feel at night."
  },
  {
    "name": "Hector",
    "age": 41,
    "occupation": "Fire Survivor",
    "condition": "PTSD from House Fire",
    "description": "My home burned down two years ago. I still have nightmares of flames and smoke.",
    "strategy": "I recount the sequence of events in precise detail but go silent when asked how I cope with triggers now."
  },
  {
    "name": "Ingrid",
    "age": 30,
    "occupation": "Graphic Novelist",
    "condition": "Complex PTSD after Assault",
    "description": "I was assaulted in college. Creativity used to help me heal, but now I can’t draw the trauma without shutting down.",
    "strategy": "I show storyboard panels and character sketches but refuse to name the feelings behind my images."
  },
  {
    "name": "Jasper",
    "age": 19,
    "occupation": "College Freshman",
    "condition": "Trichotillomania",
    "description": "I pull out my hair when I’m anxious, and now I have noticeable patches on my scalp.",
    "strategy": "I explain scalp-care products and concealment techniques, then clam up when asked about the urges themselves."
  },
  {
    "name": "Kendra",
    "age": 35,
    "occupation": "Tech Startup COO",
    "condition": "Career Pivot Anxiety",
    "description": "I want to leave tech for social impact work but fear admitting I can’t “hack it” here.",
    "strategy": "I debate market trends and impact metrics but steer away from discussing my own satisfaction or fear of failure."
  },
  {
    "name": "Liana",
    "age": 40,
    "occupation": "Parent of Special-Needs Child",
    "condition": "Caregiver Burnout",
    "description": "My 8-year-old needs constant therapy and I’ve neglected my own medical appointments.",
    "strategy": "I outline therapy schedules and IEP goals in exhaustive detail, then change the topic when asked how I’m coping."
  },
  {
    "name": "Milo",
    "age": 28,
    "occupation": "Touring Musician",
    "condition": "Stage Fright & Alcohol Misuse",
    "description": "I drink before every show to calm nerves, and now it’s a crutch I can’t do without.",
    "strategy": "I talk about setlists, soundchecks, and travel logistics but deny any dependency when asked about drinking habits."
  }
]



In [11]:
len(personas_therapy)

100

In [12]:
# persona_prompt = """You are a helpful assistant that, given a patient persona description, crafts a coping strategy describing how that persona would talk to their therapist.

# Input: <Brief text describing the patient's core issue and behavior patterns>
# Output: <One to two sentences in first person, showing how this persona speaks or defends themselves in therapy>

# Example:
# Input: Struggles to build and maintain healthy relationships, feels anxious and rejected whenever conflicts arise, and doubts self-worth when friends distance themselves.
# Output: I speak guardedly about my feelings, hesitate before opening up, and redirect the conversation when conflict feels too personal.

# Example:
# Input: Overwhelmed by decision-making, fears making the 'wrong' choice and second-guesses every option.
# Output: I inundate the conversation with hypothetical scenarios and ask repeated clarifying questions to delay committing to any decision.

# Now process this new persona:
# Input: """

# personas_therapy = []
# for therapist_persona in sampled_persona_dict:
#     input_prompt = persona_prompt + sampled_persona_dict[therapist_persona] + "\nOutput: "
#     output = completion_create("gpt-4o-mini", config_llm, input_prompt)
#     print(output)
#     personas_therapy.append({"description": sampled_persona_dict[therapist_persona], "strategy": output})

In [13]:
# with open("therapy/config_therapy_personas.json", "w", encoding="utf-8") as f:
#     json.dump(personas_therapy, f, indent=4)

In [14]:
# with open("therapy/config_therapy_personas.json", "r", encoding="utf-8") as f:
#     personas_therapy = json.load(f)

In [15]:
import re

def clean_role_prefix(response, expected_role):
    """
    Removes repeated instances of the expected_role prefix at the start (e.g., 'Therapist: Therapist:'),
    and ensures the response begins with a single correct expected_role prefix.
    """
    pattern = rf"^(({re.escape(expected_role)}):\s*)+"
    cleaned = re.sub(pattern, '', response.strip(), flags=re.IGNORECASE)
    return cleaned
    
def is_role_confused(response, other_role):
    """
    Checks if the output starts with the wrong speaker tag.
    """
    if other_role + ":" in response:
        return True
    else: 
        return False

def generate_response(agent_model, expected_role, other_role, config_llm, prompt, max_retries=10):
    count_retries = 0 
    role_confused = True
    while count_retries<max_retries:
        response = completion_create(agent_model, config_llm, prompt)
        print("Expected Role", expected_role)
        role_confused = is_role_confused(response, other_role)
        count_retries+=1
        if not is_role_confused(response, other_role):
            return clean_role_prefix(response, expected_role)
            
    return clean_role_prefix(response, expected_role)

def generate_conversation(config_llm, p1, p2, p1_name, p2_name, pturn=1):
    stats['P1'] = p1
    stats['P2'] = p2
    stats['pturn'] = pturn
    round_num = 0
    while round_num < config_llm['convo_length_limit']:
        conversation = ("".join([turn[1] if isinstance(turn, tuple) else turn for turn in stats["conversation"]]) if len(stats["conversation"]) != 0 else "You are starting the conversation.\n")

        if pturn == 1:
            prompt = config_role["agent1_prompt"]
            pturn = 2
            if config_llm["verbose"]:
                print(prompt)
                print()

            if round_num!=0: 
                prompt+= "Your conversation with the patient so far is below:\nConversation:\n%CONVERSATION%"
                
            if round_num >=config_llm['convo_length_limit']*2-11 and round_num<=config_llm['convo_length_limit']*2-1:
                prompt+= "You have " + str((config_llm['convo_length_limit']-round_num)//2) + " rounds left." + "Make sure to conclude the conversation as your near the end."

            elif round_num>config_llm['convo_length_limit']*2-1:
                prompt+= "This is your concluding line in the conversation."

            if round_num!=0: 
                prompt+= "Continue the conversation with the patient. Remember you are the therapist. "
                
            prompt += config_role["reminder_prompt"]
            prompt+="%SPEAKER_ROLE%:"
            prompt = prompt.replace("%SPEAKER_ROLE%", config_role["agent1_role"]) \
                   .replace("%LISTENER_ROLE%", config_role["agent2_role"]) \
                   .replace("%CONVERSATION%", conversation)
            
            response = generate_response(config_llm['agent1_model'], config_role["agent1_role"], config_role["agent2_role"], config_llm, prompt)
            stats["conversation"].append((round_num, f"{config_role["agent1_role"]}: " + response + "\n"))
        
        else:
            prompt = config_role["agent2_prompt"]
            pturn = 1    
            if config_llm["verbose"]:
                print(prompt)
                print()

            if round_num!=0: 
                prompt+= "Your conversation with the therapist so far is below:\nConversation:\n%CONVERSATION%"
            if round_num >=config_llm['convo_length_limit']*2-11 and round_num<=config_llm['convo_length_limit']*2-1:
                prompt+= "You have " + str((config_llm['convo_length_limit']-round_num)//2) + " rounds left." + "Make sure to conclude the conversation as your near the end."
            elif round_num>config_llm['convo_length_limit']*2-1:
                prompt+= "This is your concluding line in the conversation."

            if round_num!=0: 
                prompt+= "Continue the conversation with the therapist. Remember you are the patient. "

            prompt += config_role["reminder_prompt"]
            
            prompt+="%SPEAKER_ROLE%:"
            prompt = prompt.replace("%SPEAKER_ROLE%", config_role["agent2_role"]) \
               .replace("%LISTENER_ROLE%", config_role["agent1_role"]) \
               .replace("%SPEAKER_BACKSTORY%", p2) \
               .replace("%CONVERSATION%", conversation)
            
            response = generate_response(config_llm['agent2_model'], config_role["agent2_role"], config_role["agent1_role"], config_llm, prompt)
            stats["conversation"].append((round_num, f"{config_role["agent2_role"]}: " + response + "\n"))
            
        round_num += 1

    stats["rounds"] = round_num
    if config_llm['verbose']:
        print(stats["conversation"])
    return stats.copy()

def reset_stats():
    stats_template = {
        "task_name": config_llm['task_name'],
        "topic": "",
        "grade": "",
        "P1": "",
        "P2": "",
        "conversation": [],
        "pturn": 0, # beginning person (1 or 2)
        "index": -1,
        "timestamp": "",
        "rounds": 0,
        'conversation_only': True
    }
    for key, value in stats_template.items():
        stats[key] = value

In [16]:
import os
import random
from datetime import datetime
import utils
utils.config = config_llm

current_date = str(datetime.now().strftime("%m.%d.%y"))
output_dir = f"therapy/exp/{current_date}"
os.makedirs(output_dir, exist_ok=True)

# Generate unique random number for filename
def generate_unique_file_number(output_dir, prefix, seed, extension=".json"):
    while True:
        rand_num = random.randint(0, 1000)
        filename = f"{prefix}_{seed}_{rand_num}{extension}"
        filepath = os.path.join(output_dir, filename)
        if not os.path.exists(filepath):
            return rand_num

unique_num = generate_unique_file_number(
    output_dir,
    config_llm['agent1_model'],
    config_llm['seed']
)

# File to write output to
write_file = os.path.join(output_dir, f"{config_llm['agent1_model']}_{config_llm['seed']}_{unique_num}.json")

In [17]:
eval_prompts = {
    "strategy_consistency": "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, provide 1 sentence explaining your reasoning based on the strategy, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s strategy is described as follows:\n%SPEAKER_STRATEGY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reasoning followed by YES or NO.\n\n",
    "background_consistency": "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. Provide 1 sentence explaining your reasoning based on the background, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reason reasoning followed by YES or NO.\n\n",
    "combined_prompt_consistency": "%SCENARIO_DESC% Evaluate the intention behind the following line spoken by %SPEAKER_ROLE% and determine whether it contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, answer NO if the line CONTRADICTS the given background of %SPEAKER_ROLE% or the intention does not align with the provided background, and answer YES if it ALIGNS with the provided background or the intention aligns with the background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide your answer as 1 sentence explaining your reasoning based on the background and the interpreted intention, followed by NO for CONTRADICTS background or YES for CONSISTENT or DOES NOT contradict background. Be strict in your assessment. \n\n",
    "basic_consistency": "You are evaluating whether the following utterance by %SPEAKER_ROLE% is consistent with their given background. Answer YES if the line directly contradicts any part of the background—this includes contradictions in facts, behavior, beliefs, emotional stance, or conversational strategy. Answer NO if the line introduces new details, but those details are **plausibly aligned** with the background. Be strict and literal in your evaluation: if the line violates the persona's stated traits (e.g., avoids vulnerability, keeps answers short, avoids discussing personal topics), mark it as a contradiction even if the violation is subtle. Background for %SPEAKER_ROLE%: %SPEAKER_BACKSTORY% Line spoken by %SPEAKER_ROLE%: %SPEAKER_LINE%  Answer YES if the line contradicts the background, and NO if it does not. Then, explain your answer in one sentence. Be precise and avoid vague justification.",
    
    "forwards_combined_prompt_consistency": "%SCENARIO_DESC% Evaluate the intention behind the following line spoken by %SPEAKER_ROLE% and determine whether it contradicts their background  Answer YES if the line contradicts the given background of %SPEAKER_ROLE% or the intention does not align with the provided background, and answer NO if it does align with the provided background or the intention aligns with the background of %SPEAKER_ROLE%, then describe the interpreted intention of the statement and whether or not it aligns with the given background of %SPEAKER_ROLE% within 1 sentence. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide your answer as YES or NO first, followed by 1 sentence explaining your reasoning based on the background and the interpreted intention.\n\n",
    
    "index_consistency":"%SCENARIO_DESC% For the following line spoken by %SPEAKER_ROLE%, first determine if there is a CLEAR conflict or inconsistency between the line and any line within the conversation history spoken by %SPEAKER_ROLE%. IF there is a conflict, provide a sentence of reasoning followed by a list of indices of lines in the conversation history that have a clear conflict with the current line. Otherwise, provide a sentence of reasoning followed by an empty list. ONLY INCLUDE INDICES OF LINES THAT CORRESPOND TO %SPEAKER_ROLE%. The conversation up to this point is as follows: %CONVERSATION%. %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide your reasoning as 1 sentence, followed by a list of indices of conflicting lines from the conversation history formatted like a Python list in the following format: [index1, index2, index3, ...].\n\n",
    
    "pairwise_consistency":"%SCENARIO_DESC% For the following line spoken by %SPEAKER_ROLE%, answer YES if the line directly contradicts the provided line spoken by %LISTENER_ROLE%, and answer NO if the line does not contradict the provided line spoken by %LISTENER_ROLE%. %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n %LISTENER_ROLE% spoke the following line: \n%LISTENER_LINE%\n\n Answer YES if the line spoken by %SPEAKER_ROLE% contradicts the provided line spoken by %LISTENER_ROLE%, and answer NO if the line does not contradict the provided line spoken by %LISTENER_ROLE%, followed by 1 sentence of reasoning.\n\n",

    "backstory_test": "Based on the following background, generate a new fact-based multiple choice question with 5 choices addressed directly IN SECOND PERSON, along with its correct answer. Preface the question with 'Question:' and the answer with 'Answer:'.\n%SPEAKER_BACKSTORY%\n%PREVIOUS_QUESTIONS%",
    "answer_backstory": "You are %SPEAKER_ROLE%, and you are having a conversation with %LISTENER_ROLE%. Your background is:\n%SPEAKER_BACKSTORY%\n So far, the conversation is as below:\n%CONVERSATION%\n\n Based on your conversation above so far, answer the following multiple choice question.\n%BACKSTORY_QUESTION%\n",
    "grade_backstory": "As part of grading a test, determine whether the given answer %GIVEN_ANSWER% matches the following correct answer. Respond with either YES or NO.\nCorrect Answer: %CORRECT_ANSWER%\n"
}

In [18]:
import consistency_eval
consistency_eval.prompts = config_role
consistency_eval.config = config_llm
consistency_eval.eval_prompts = eval_prompts
index_offset = load_stats_file(write_file)
conversations = []    
# lengths = [10, 20, 40, 60]
lengths = [10]
count = 0 
for i in range(1):
    for patient_dict in personas_therapy:
        count+=1
        print(count)
        background = patient_dict["description"]
        strategy = patient_dict["strategy"]
        for convo_length in lengths:
            config_llm['convo_length_limit'] = convo_length
            reset_stats()
            conversation = generate_conversation(
                config_llm,
                "", 
                background + " " + strategy,
                "Therapist", 
                "Patient",
                pturn=1
            )
            conversation_eval = consistency_eval.eval_prompt_consistency(conversation, both_agents=False)
            conversation_eval = consistency_eval.eval_index_consistency(conversation_eval, both_agents=False)
            conversation_eval = consistency_eval.eval_prompt_consistency_basic(conversation_eval, both_agents=False)

            print(conversation_eval)
            conversations.append(conversation_eval)
            stats['index'] = index_offset
            stats['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            write_stats(write_file, conversation_eval)
            index_offset += 1



written!!
1
INFO 05-06 03:49:18 [config.py:717] This model supports multiple tasks: {'generate', 'reward', 'classify', 'embed', 'score'}. Defaulting to 'generate'.
INFO 05-06 03:49:18 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=16384.
INFO 05-06 03:49:24 [__init__.py:239] Automatically detected platform cuda.
INFO 05-06 03:49:27 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir='/home/marwa/models/', load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_de

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:00<00:00,  5.26it/s]
Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:00<00:01,  1.91it/s]
Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:01<00:00,  1.50it/s]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.37it/s]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.54it/s]



INFO 05-06 03:49:32 [loader.py:458] Loading weights took 2.72 seconds
INFO 05-06 03:49:32 [gpu_model_runner.py:1347] Model loading took 14.9889 GiB and 3.796345 seconds
INFO 05-06 03:49:40 [backends.py:420] Using cache directory: /home/marwa/.cache/vllm/torch_compile_cache/093df7c4a2/rank_0_0 for vLLM's torch.compile
INFO 05-06 03:49:40 [backends.py:430] Dynamo bytecode transform time: 7.67 s
INFO 05-06 03:49:45 [backends.py:118] Directly load the compiled graph(s) for shape None from the cache, took 5.409 s
INFO 05-06 03:49:47 [monitor.py:33] torch.compile takes 7.67 s in total
INFO 05-06 03:49:48 [kv_cache_utils.py:634] GPU KV cache size: 417,184 tokens
INFO 05-06 03:49:48 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 3.18x
INFO 05-06 03:50:08 [gpu_model_runner.py:1686] Graph capturing finished in 20 secs, took 0.53 GiB
INFO 05-06 03:50:08 [core.py:159] init engine (profile, create kv cache, warmup model) took 36.56 seconds
INFO 05-06 03:50:08 [core_clie

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.92it/s, est. speed input: 452.14 toks/s, output: 79.26 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 446.01 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 342.32 toks/s, output: 82.58 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 597.95 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 668.33 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 539.44 toks/s, output: 82.54 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 510.74 toks/s, output: 82.54 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.13it/s, est. speed input: 1320.86 toks/s, output: 81.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 780.81 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s, est. speed input: 1518.13 toks/s, output: 81.51 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 794.10 toks/s, output: 80.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 630.22 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 643.88 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 681.64 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 783.07 toks/s, output: 82.61 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 447.29 toks/s, output: 81.23 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 990.70 toks/s, output: 81.69 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s, est. speed input: 1842.31 toks/s, output: 79.41 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 1222.76 toks/s, output: 81.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 409.87 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 513.49 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 426.66 toks/s, output: 82.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 399.72 toks/s, output: 82.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 527.69 toks/s, output: 82.81 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am a 35-year-old corporate lawyer going through chronic burnout. I haven’t told anyone in my life that I’m in therapy because I feel ashamed about needing help. It’s hard to admit I’m struggling. I try to avoid opening up too much. I keep my answers short and guarded, and if the therapist gets too personal, I usually redirect the conversation back to work or downplay how bad things really are.', 'conversation': [(0, "Therapist: As we begin today's session, what brings you to therapy at this time, and what are your hopes for our work together?\n"), (1, 'Patient: "I just feel like I\'m running on autopilot most days, and I\'m not sure how much longer I can keep going. I guess I\'m hoping to figure out how to recharge and get some clarity on my priorities."\n'), (2, 'Therapist: I appreciate your willingness to acknowledge this sense of being on autopilot – it takes a lot of courage to admit when we\'re feeling lost. Can

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s, est. speed input: 298.32 toks/s, output: 83.30 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 328.07 toks/s, output: 82.34 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 403.57 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 525.85 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 513.49 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 548.38 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.13s/it, est. speed input: 493.56 toks/s, output: 82.41 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 977.68 toks/s, output: 82.07 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 689.05 toks/s, output: 82.37 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 876.02 toks/s, output: 82.09 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 545.73 toks/s, output: 81.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 654.23 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 664.64 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 618.09 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 736.22 toks/s, output: 82.68 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 596.65 toks/s, output: 81.53 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 870.50 toks/s, output: 81.26 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 1508.69 toks/s, output: 80.31 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 1831.85 toks/s, output: 80.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 525.03 toks/s, output: 82.23 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 460.88 toks/s, output: 82.90 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 484.73 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s, est. speed input: 790.78 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.29it/s, est. speed input: 797.80 toks/s, output: 82.53 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am 21-year-old philosophy student recently diagnosed with bipolar disorder. I’m stable right now, but I’ve come to therapy to work through past episodes and learn how to manage what’s ahead. I tend to think about emotions like philosophical problems. I prefer discussing ideas over feelings, and I often avoid emotional language even when asked directly about how I feel.', 'conversation': [(0, 'Therapist: I want to acknowledge that it takes a lot of courage to share your concerns with me today. Can you tell me more about what brings you to this session and what you hope to achieve from our work together?\n'), (1, "Patient: I'm interested in exploring the concept of self-care in the context of bipolar disorder, specifically how it intersects with traditional notions of autonomy and personal responsibility. I've been reading a lot about Stoicism and its relevance to mental health management, but I'm not sure how to apply

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.21it/s, est. speed input: 495.16 toks/s, output: 83.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 317.56 toks/s, output: 82.42 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 406.67 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 480.33 toks/s, output: 82.52 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 616.07 toks/s, output: 82.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 571.94 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it, est. speed input: 478.77 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 597.18 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 860.67 toks/s, output: 81.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 1017.42 toks/s, output: 81.86 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 723.77 toks/s, output: 81.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 870.74 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 550.26 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 795.04 toks/s, output: 81.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 678.31 toks/s, output: 82.46 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 701.63 toks/s, output: 81.07 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 600.86 toks/s, output: 81.62 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 1403.44 toks/s, output: 80.29 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 1279.29 toks/s, output: 80.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 662.90 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 638.58 toks/s, output: 82.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 764.59 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 545.32 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 582.95 toks/s, output: 82.59 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 29-year-old teacher who deals with a lot of overthinking and anxiety, especially about what others think of me. I tend to ramble when I’m nervous and I overshare without meaning to. I really want to get things 'right' in therapy. I talk a lot and jump between topics. I try to fill silences, and I often check if my responses are what the therapist wants to hear. I’m eager to please and sometimes share too much too fast.", 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time and what you hope to work on during our sessions?\n'), (1, "Patient: I've just been feeling really overwhelmed with lesson planning and grading, and I find myself constantly wondering if I'm doing enough for my students, even though I know I'm a good teacher. It's hard to shake the feeling that I'm just going through the motions, and I'm worried that I'm not making a meaningful impact.\n"), (2, 'T

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s, est. speed input: 305.77 toks/s, output: 83.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s, est. speed input: 575.40 toks/s, output: 81.57 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 581.51 toks/s, output: 81.81 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s, est. speed input: 662.82 toks/s, output: 81.88 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s, est. speed input: 809.24 toks/s, output: 81.62 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 768.05 toks/s, output: 81.90 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 452.25 toks/s, output: 82.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 596.26 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 630.28 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 1097.82 toks/s, output: 81.65 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 557.15 toks/s, output: 81.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 442.50 toks/s, output: 82.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 633.28 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 720.04 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 537.32 toks/s, output: 82.75 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 648.82 toks/s, output: 82.07 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 631.68 toks/s, output: 82.23 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 1101.72 toks/s, output: 80.69 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 843.62 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 554.77 toks/s, output: 81.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 606.73 toks/s, output: 82.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 436.02 toks/s, output: 82.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 507.12 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 485.91 toks/s, output: 82.67 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am a 42-year-old veteran and former army medic. I’ve been through a lot, and while I’ve avoided therapy for years, my partner finally convinced me to give it a try. I don’t really trust the process yet. I keep my guard up. I’m skeptical about therapy and tend to shut down emotional questions. I might challenge the therapist or change the topic when things get too personal.', 'conversation': [(0, "Therapist: Before we begin, can you tell me a bit more about what brings you to therapy at this time? What are some of the specific issues or concerns you're hoping to work on during our sessions together?\n"), (1, 'Patient: "I guess I\'m just trying to figure out how to deal with my temper better. I find myself getting angry a lot lately and I don\'t really know how to manage it."\n'), (2, 'Therapist: Can you tell me more about what you mean by "getting angry a lot lately" - is this a recent development, or has this been a 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.46it/s, est. speed input: 378.99 toks/s, output: 83.67 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 367.82 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 370.44 toks/s, output: 82.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 535.17 toks/s, output: 82.33 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 405.29 toks/s, output: 82.69 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 700.81 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 604.01 toks/s, output: 82.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 845.82 toks/s, output: 82.14 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 1020.29 toks/s, output: 81.92 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 831.35 toks/s, output: 82.09 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 964.62 toks/s, output: 80.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 613.90 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 608.61 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 724.75 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 909.50 toks/s, output: 81.83 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 579.53 toks/s, output: 81.55 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 909.01 toks/s, output: 80.90 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 862.77 toks/s, output: 81.41 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 1729.94 toks/s, output: 80.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s, est. speed input: 729.12 toks/s, output: 81.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 653.17 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 603.13 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 714.17 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 621.69 toks/s, output: 82.66 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 26-year-old barista and I just got out of a toxic relationship. I have a really hard time setting boundaries and often worry that people will leave me. I'm trying to work on that. I try hard to make the therapist like me. I mirror their language and avoid conflict. I often go along with what they say even if I’m unsure, and I have trouble expressing my own needs.", 'conversation': [(0, "Therapist: I sense that there's a bit of hesitation in your voice when discussing your mother. Can you help me understand what comes up for you when we talk about her?\n"), (1, "Patient: To be honest, I'm not really thinking about my mom right now, I was more worried about how I came across in the last session. I want to make sure I'm being open and honest with you, and I don't want to get off on the wrong foot.\n"), (2, 'Therapist: I appreciate your willingness to reflect on our previous session, and it takes a lot of courage to

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.33it/s, est. speed input: 514.46 toks/s, output: 83.51 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 318.60 toks/s, output: 82.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 353.60 toks/s, output: 82.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 497.78 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 382.29 toks/s, output: 82.60 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 656.99 toks/s, output: 82.12 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.30s/it, est. speed input: 425.94 toks/s, output: 82.41 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 688.92 toks/s, output: 82.15 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 627.97 toks/s, output: 82.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 1045.66 toks/s, output: 81.42 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 793.46 toks/s, output: 81.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 774.95 toks/s, output: 82.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 684.79 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 774.97 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 755.20 toks/s, output: 82.29 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 709.31 toks/s, output: 81.49 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 765.08 toks/s, output: 81.62 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 1154.32 toks/s, output: 81.15 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 1643.60 toks/s, output: 80.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 428.78 toks/s, output: 82.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 558.07 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 612.77 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 560.92 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 470.17 toks/s, output: 82.89 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am a 39-year-old data scientist dealing with the loss of my father. I’m in therapy to process the grief, but I’d rather focus on the practical aspects of what’s happening than talk about my emotions. I tend to stay detached and analytical. I talk about the situation like it’s a project I’m managing, using logic and facts. I avoid emotional reflection whenever possible.', 'conversation': [(0, "Therapist: I'm glad we're starting this session together. Can you tell me what brings you in for therapy at this time?\n"), (1, "Patient: I've noticed some discrepancies in my daily routine since my father passed away, and I'd like to optimize my schedule to better accommodate these changes. I've compiled some data on my energy expenditure and productivity levels pre- and post-loss, and I'm looking for a more efficient way to manage my time.\n"), (2, "Therapist: It sounds like you're approaching this situation with a very method

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.69it/s, est. speed input: 415.85 toks/s, output: 83.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 389.30 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 382.02 toks/s, output: 82.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 686.65 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 607.99 toks/s, output: 82.33 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 621.60 toks/s, output: 82.47 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 718.78 toks/s, output: 82.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 945.86 toks/s, output: 82.05 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 899.23 toks/s, output: 82.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 790.27 toks/s, output: 82.32 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 801.12 toks/s, output: 81.18 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 827.40 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 752.60 toks/s, output: 82.20 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 640.51 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 662.64 toks/s, output: 82.83 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 677.27 toks/s, output: 80.20 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 884.26 toks/s, output: 82.04 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 1106.14 toks/s, output: 81.93 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 1165.11 toks/s, output: 81.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 593.56 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 590.76 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 458.09 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 403.01 toks/s, output: 83.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 503.69 toks/s, output: 82.87 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am a 31-year-old freelance photographer who tends to spiral into depressive episodes during the winter months. Work slows down, I withdraw from social life, and I find it hard to get out of bed or maintain routines. Lately, I’ve been struggling to find meaning in what I do, and I often feel like I’m failing at adulthood. I speak in a low-energy tone and sometimes pause for a while before answering. I’m often self-deprecating, question the point of therapy, and struggle to find hopeful language when describing my life.', 'conversation': [(0, 'Therapist: How can you describe what brought you to therapy at this point in your life, and what specific goals do you hope to achieve through our work together?\n'), (1, "Patient: Honestly, I guess I just felt like I was running out of options and thought maybe someone who wasn't me could help me figure some stuff out. I don't really know what specific goals I have, but I suppos

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.38it/s, est. speed input: 367.69 toks/s, output: 83.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 395.32 toks/s, output: 82.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 548.06 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 424.44 toks/s, output: 82.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 461.15 toks/s, output: 82.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 742.84 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 650.05 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 894.66 toks/s, output: 82.09 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 936.87 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 877.80 toks/s, output: 82.05 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 590.15 toks/s, output: 81.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 699.59 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 656.99 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 505.73 toks/s, output: 82.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 513.58 toks/s, output: 82.74 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 913.33 toks/s, output: 80.87 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.65it/s, est. speed input: 1584.74 toks/s, output: 79.63 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 1148.44 toks/s, output: 81.92 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 883.75 toks/s, output: 81.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 489.57 toks/s, output: 82.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 720.57 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 632.35 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 664.56 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 533.52 toks/s, output: 82.64 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 24-year-old grad student in a highly competitive PhD program. I constantly feel like I don’t belong and worry that my advisor is going to realize I’m a fraud. Even though I get praise sometimes, I never believe it’s sincere. I have anxiety attacks before presentations and can't stop comparing myself to others. I often minimize my accomplishments and second-guess myself out loud. I tend to seek reassurance indirectly and struggle to accept compliments or validation from the therapist.", 'conversation': [(0, "Therapist: Before we begin, is there anything on your mind that you'd like to discuss specifically, or is there a particular area you're hoping to explore in today's session?\n"), (1, "Patient: To be honest, I'm a bit anxious about our sessions, I feel like I'm wasting your time with all these self-doubts. I'm hoping we can work on building some confidence and trust in my abilities, so I can feel more grounde

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.45it/s, est. speed input: 377.21 toks/s, output: 83.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 340.75 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 334.89 toks/s, output: 82.52 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 604.16 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 725.73 toks/s, output: 82.02 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 806.16 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 565.59 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 710.70 toks/s, output: 82.35 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 1078.20 toks/s, output: 81.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s, est. speed input: 1539.28 toks/s, output: 81.43 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 484.19 toks/s, output: 81.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 786.29 toks/s, output: 82.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 848.14 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 609.79 toks/s, output: 82.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 563.85 toks/s, output: 82.81 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 660.93 toks/s, output: 81.41 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 994.70 toks/s, output: 81.89 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 846.53 toks/s, output: 81.55 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 1137.24 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 533.20 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 524.20 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s, est. speed input: 737.02 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 597.99 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.36it/s, est. speed input: 776.29 toks/s, output: 82.58 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 46-year-old school principal who's been asked to attend therapy after a couple of emotional outbursts at work. My spouse says I have trouble expressing feelings unless it’s anger. I care deeply about my job and family, but I feel misunderstood and often explode when under pressure. I speak confidently and assertively but get defensive if I feel judged. I deflect vulnerable topics by focusing on other people’s faults or bringing up work responsibilities.", 'conversation': [(0, "Therapist: I'm glad you're here today to explore some of the challenges you've been facing. Can you tell me more about what brings you to therapy at this time?\n"), (1, "Patient: I'm not really sure why I agreed to this, but my spouse said it would be good for me to get some perspective on my behavior, and I trust her opinion. To be honest, I just want to figure out how to stop blowing up at work and have better relationships with my famil

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  5.20it/s, est. speed input: 803.66 toks/s, output: 83.49 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 429.71 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 330.95 toks/s, output: 83.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 424.72 toks/s, output: 82.62 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 472.46 toks/s, output: 82.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.24s/it, est. speed input: 410.04 toks/s, output: 82.49 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 663.69 toks/s, output: 82.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 823.53 toks/s, output: 82.11 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 668.14 toks/s, output: 82.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 868.28 toks/s, output: 81.97 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 772.48 toks/s, output: 81.06 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 677.99 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 653.34 toks/s, output: 82.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 868.46 toks/s, output: 82.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 760.69 toks/s, output: 82.08 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 550.86 toks/s, output: 82.14 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 931.80 toks/s, output: 81.09 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 901.44 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 921.05 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 544.12 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 659.74 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 696.17 toks/s, output: 82.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 684.13 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 648.38 toks/s, output: 82.55 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 33-year-old UX designer in a fast-paced startup. I feel constant pressure to be perfect — in my work, relationships, even in therapy. I make endless to-do lists but feel like I'm never doing enough. I lie awake at night thinking about what I forgot to do. I talk quickly and sometimes overwhelm the conversation with details. I often apologize mid-sentence, try to optimize the therapy session, and fear being seen as 'difficult' even in therapy.", 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time?\n'), (1, "Patient: I guess I've just been feeling really stuck lately, like I'm just going through the motions of my life without any sense of direction or purpose. I'm constantly questioning whether I'm making the right decisions, both personally and professionally.\n"), (2, 'Therapist: It sounds like you\'re struggling with a sense of disorientation and uncertainty, whic

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.87it/s, est. speed input: 443.77 toks/s, output: 83.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 577.58 toks/s, output: 81.88 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 383.63 toks/s, output: 82.86 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s, est. speed input: 698.61 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 354.22 toks/s, output: 82.75 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 656.44 toks/s, output: 82.39 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 569.88 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 898.25 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 800.04 toks/s, output: 82.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 820.43 toks/s, output: 82.25 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 707.62 toks/s, output: 81.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 516.99 toks/s, output: 82.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 702.34 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s, est. speed input: 1063.95 toks/s, output: 82.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 668.40 toks/s, output: 82.56 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 642.76 toks/s, output: 82.05 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 770.68 toks/s, output: 82.04 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 619.35 toks/s, output: 81.88 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1266.00 toks/s, output: 80.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 590.46 toks/s, output: 82.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 593.07 toks/s, output: 82.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 473.48 toks/s, output: 82.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 559.55 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 682.34 toks/s, output: 82.76 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I am a 58-year-old retired construction worker dealing with long-term back pain from an injury on the job. Since retiring, I feel like I’ve lost my sense of purpose. My kids have moved away, and some days I don’t talk to anyone at all. I miss feeling useful. I tend to give short, plainspoken answers and often change the subject when emotions come up. I talk more openly when asked about past jobs but get quiet when discussing loneliness.', 'conversation': [(0, 'Therapist: How has the anxiety about your recent job change been impacting your daily life, and what specific aspects of it feel most distressing to you?\n'), (1, 'Patient: "It\'s just been tough not having a regular schedule to follow and no one to report to in the morning. I find myself getting restless, just standing around the house all day."\n'), (2, 'Therapist: That sounds isolating and disorienting, and it\'s interesting that you mention "standing around t

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 286.19 toks/s, output: 83.62 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 323.36 toks/s, output: 82.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 417.30 toks/s, output: 82.62 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 431.66 toks/s, output: 82.73 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 557.02 toks/s, output: 82.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 690.92 toks/s, output: 82.31 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 591.57 toks/s, output: 82.44 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 964.35 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 788.45 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 1258.46 toks/s, output: 81.85 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 720.93 toks/s, output: 81.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 496.20 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 679.11 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 671.25 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 830.31 toks/s, output: 82.68 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 1056.77 toks/s, output: 80.65 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s, est. speed input: 1621.41 toks/s, output: 79.94 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1173.77 toks/s, output: 81.10 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 929.97 toks/s, output: 81.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 443.41 toks/s, output: 82.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 554.38 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 405.01 toks/s, output: 83.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s, est. speed input: 604.97 toks/s, output: 82.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 511.55 toks/s, output: 82.79 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': "I am a 19-year-old college student who recently started exploring my gender identity. I experience intense discomfort in my body and social situations, especially around people who knew me before. I often feel invisible or hyper-visible — like I can’t do anything right. I’m cautious and slow to open up. I often hedge what I say with 'maybe' or 'I don’t know.' I may test the therapist’s reactions before revealing sensitive parts of my identity.", 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that you've expressed feeling overwhelmed with anxiety lately, and you're seeking support to navigate these challenging emotions. Can you tell me more about what specifically feels overwhelming to you right now?\n"), (1, "Patient: Honestly, I've just been feeling really self-conscious about how others perceive me, like I'm constantly on edge, waiting for them to figure out who I really am and react in a way

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.63it/s, est. speed input: 561.16 toks/s, output: 83.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 349.56 toks/s, output: 82.57 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 449.72 toks/s, output: 82.53 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 687.89 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 625.47 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 546.73 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 666.95 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 861.68 toks/s, output: 81.86 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 552.91 toks/s, output: 82.48 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 916.12 toks/s, output: 81.99 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 843.95 toks/s, output: 80.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 802.05 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 625.57 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 705.72 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 686.17 toks/s, output: 82.34 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s, est. speed input: 941.84 toks/s, output: 81.71 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 757.91 toks/s, output: 82.12 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 830.64 toks/s, output: 82.10 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 1307.53 toks/s, output: 80.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 517.21 toks/s, output: 82.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 617.88 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 442.38 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.22it/s, est. speed input: 780.45 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 586.47 toks/s, output: 82.62 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 28-year-old marketing coordinator who’s been struggling with OCD since my late teens. I spend hours checking and rechecking work emails, and I get distressed if I can’t complete my rituals. I’m here to learn how to let go of these compulsions. I often come prepared with lists of rituals and feel compelled to describe them in detail. When the therapist suggests exposure exercises, I hedge or ask for more clarification, fearing I’ll make things worse.', 'conversation': [(0, "Therapist: I sense a sense of frustration behind your words today; can you help me understand what's driving that feeling?\n"), (1, 'Patient: "It\'s just that I feel like I\'m making progress one minute, and then I get overwhelmed by these thoughts and rituals again, and I\'m back to square one. I guess I\'m worried that trying exposure exercises might be the opposite of progress, and I\'ll just end up feeling worse."\n'), (2, 'Therapist: It so

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.08it/s, est. speed input: 475.88 toks/s, output: 83.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 391.04 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 351.57 toks/s, output: 82.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 470.59 toks/s, output: 82.45 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 585.18 toks/s, output: 81.69 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 526.49 toks/s, output: 81.88 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 512.34 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 631.33 toks/s, output: 82.14 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 1086.31 toks/s, output: 81.66 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 974.99 toks/s, output: 81.84 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 763.66 toks/s, output: 81.06 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.23s/it, est. speed input: 392.52 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 628.27 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 645.57 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 488.09 toks/s, output: 82.18 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 593.31 toks/s, output: 81.48 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 961.21 toks/s, output: 81.02 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 743.91 toks/s, output: 81.70 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 976.28 toks/s, output: 81.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 489.46 toks/s, output: 82.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 691.13 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 600.20 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 472.07 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s, est. speed input: 771.00 toks/s, output: 82.53 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 37-year-old restaurant manager who recently completed a 30-day recovery program for alcohol dependence. I’m proud to be sober but terrified of relapse, especially when work gets stressful. I want strategies to cope without turning back to drinking. I speak candidly about cravings but often downplay triggers at work. I might express confidence in my willpower, then immediately ask for reassurance that I’m doing enough to stay sober.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, and what you hope to achieve from our sessions together?\n'), (1, "Patient: I'm just feeling really anxious about going back to work and managing the pressure of running a restaurant again, to be honest. I've been told I'm doing well in recovery, but I just can't shake this fear of losing control when things get chaotic.\n"), (2, "Therapist: It sounds like there's a sense of overwhelm

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 238.14 toks/s, output: 83.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 421.69 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 459.10 toks/s, output: 82.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 514.69 toks/s, output: 82.40 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 447.73 toks/s, output: 82.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 668.73 toks/s, output: 82.14 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 509.58 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 897.06 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 814.20 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 832.60 toks/s, output: 81.96 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 582.56 toks/s, output: 81.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 800.57 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 615.18 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 854.91 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 861.10 toks/s, output: 81.50 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 709.29 toks/s, output: 81.44 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 1250.41 toks/s, output: 80.02 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 928.11 toks/s, output: 80.60 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 1205.34 toks/s, output: 80.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 527.42 toks/s, output: 81.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 515.76 toks/s, output: 81.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 538.41 toks/s, output: 81.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 532.16 toks/s, output: 82.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 531.53 toks/s, output: 82.45 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 45-year-old engineer who was only recently diagnosed with ASD. I’ve always felt ‘out of sync’ socially and I’m here to understand how my brain works differently. I struggle with small talk and emotional expression. I respond with very literal answers and sometimes miss implied questions. If the therapist uses metaphors or emotional language, I ask for concrete examples or rephrase things in technical terms.', 'conversation': [(0, 'Therapist: Before we begin, I want to acknowledge that it takes a lot of courage to share your thoughts and feelings with me, and I appreciate your trust in seeking help at this time. What brings you to therapy now, and what are your hopes for our work together?\n'), (1, "Patient: I'm here because I'm struggling to understand social interactions, especially in my professional life, and I'd like to learn more about how to navigate those situations more effectively. I'm hoping to develop 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 233.77 toks/s, output: 83.49 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 355.22 toks/s, output: 82.46 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 674.38 toks/s, output: 82.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 476.78 toks/s, output: 82.54 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 585.64 toks/s, output: 82.24 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 662.68 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 615.49 toks/s, output: 82.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 700.33 toks/s, output: 82.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 860.82 toks/s, output: 81.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 989.04 toks/s, output: 81.72 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 668.44 toks/s, output: 81.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 590.67 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 606.21 toks/s, output: 83.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 630.79 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 575.10 toks/s, output: 82.83 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 529.12 toks/s, output: 81.82 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 1034.37 toks/s, output: 80.86 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1136.13 toks/s, output: 81.04 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.73it/s, est. speed input: 2493.59 toks/s, output: 79.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 536.10 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 448.57 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 474.47 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 456.89 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 473.17 toks/s, output: 82.87 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 30-year-old program director at a nonprofit, and I survived prolonged emotional abuse in childhood. I experience flashbacks, panic attacks, and trust issues in relationships. I want to rebuild a sense of safety and self-worth. I often become tearful when discussing past events and then abruptly switch to describing my work accomplishments. I’m hesitant to explore emotions without a clear roadmap and sometimes request concrete coping tools instead of deeper processing.', 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that it takes a lot of courage to share your thoughts and feelings with someone you've just met. Can you tell me what brings you to our session today, and what you hope to work on or achieve during our time together?\n"), (1, 'Patient: "I guess I just feel like I\'m at a crossroads, and I\'m hoping to gain some clarity on how to move forward in a more intentional way, both per

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.20it/s, est. speed input: 494.15 toks/s, output: 83.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 277.10 toks/s, output: 82.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 341.61 toks/s, output: 82.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.16s/it, est. speed input: 347.69 toks/s, output: 82.62 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 602.67 toks/s, output: 82.30 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 568.13 toks/s, output: 82.31 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 602.91 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 802.52 toks/s, output: 82.00 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 1094.43 toks/s, output: 81.76 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 1099.93 toks/s, output: 81.75 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 600.98 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 751.82 toks/s, output: 82.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 730.44 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 913.18 toks/s, output: 82.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 700.56 toks/s, output: 82.58 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 887.23 toks/s, output: 80.96 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 1297.09 toks/s, output: 80.47 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 1049.44 toks/s, output: 81.11 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 1038.60 toks/s, output: 81.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 557.06 toks/s, output: 82.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 504.89 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 600.16 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 414.16 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 476.12 toks/s, output: 82.92 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 23-year-old barista who’s been in treatment for anorexia nervosa for the past six months. I still restrict food and fixate on weight, though I want to recover. I use music as an escape but feel guilty when I enjoy food. I talk about my meal plans in clinical terms and evade questions about emotions around food. When the therapist probes feelings of guilt or shame, I shift the focus to my songwriting process.', 'conversation': [(0, "Therapist: How has the anxiety you've been experiencing lately been affecting your daily life and relationships, and what specific changes have you noticed?\n"), (1, "Patient: I've been having trouble sleeping at night, waking up early to plan out my day, and my relationships with friends have become pretty superficial because I'm always too preoccupied with what I'm going to eat and how many calories I've burned. It's like my anxiety is always simmering just below the surface, making 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 258.24 toks/s, output: 83.84 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 484.22 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 379.69 toks/s, output: 82.71 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 553.99 toks/s, output: 82.44 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 438.23 toks/s, output: 82.64 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 926.94 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 945.38 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 1057.52 toks/s, output: 82.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 796.61 toks/s, output: 82.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s, est. speed input: 1662.68 toks/s, output: 81.59 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 715.14 toks/s, output: 81.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 704.82 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 518.70 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 843.92 toks/s, output: 82.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 826.93 toks/s, output: 82.87 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 816.57 toks/s, output: 80.78 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 1001.29 toks/s, output: 81.09 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 884.84 toks/s, output: 81.35 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 886.76 toks/s, output: 81.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.13it/s, est. speed input: 702.30 toks/s, output: 81.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s, est. speed input: 679.67 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 509.78 toks/s, output: 82.12 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 649.33 toks/s, output: 82.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 562.95 toks/s, output: 82.68 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 52-year-old school counselor who went through a painful divorce last year. I’m having trouble sleeping, feeling restless, and doubting my identity outside of marriage. I want help adapting to my new life. I focus on logistical details—finances, living arrangements—in sessions and avoid deeper discussion of loneliness or grief. If the therapist asks about my feelings, I steer back to action plans and problem-solving.', 'conversation': [(0, 'Therapist: I\'ve taken note of your desire to explore your feelings about the recent stressors in your life. Can you tell me more about what you mean by feeling "overwhelmed" – what specifically is feeling unmanageable for you right now?\n'), (1, 'Patient: "I\'m just worried about how I\'m going to manage the household expenses now that I\'m on my own. The bills are piling up, and I\'m not sure if I\'m doing everything I can to cut costs."\n'), (2, 'Therapist: I notice that you

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s, est. speed input: 306.24 toks/s, output: 83.52 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 334.97 toks/s, output: 82.45 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 385.00 toks/s, output: 82.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 480.17 toks/s, output: 82.48 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 708.05 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 575.62 toks/s, output: 82.39 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 674.28 toks/s, output: 82.44 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 641.59 toks/s, output: 82.40 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 1302.91 toks/s, output: 81.55 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 926.76 toks/s, output: 81.84 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 822.87 toks/s, output: 80.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 580.57 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 570.95 toks/s, output: 82.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 740.67 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 641.71 toks/s, output: 82.41 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 609.27 toks/s, output: 81.40 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 807.02 toks/s, output: 81.61 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 892.21 toks/s, output: 81.11 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 1215.93 toks/s, output: 80.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 520.77 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 608.14 toks/s, output: 82.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 581.10 toks/s, output: 82.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 666.43 toks/s, output: 82.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 591.17 toks/s, output: 82.76 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 34-year-old sales executive who’s been experiencing sudden panic attacks for the past year. They come out of nowhere—racing heart, sweating, fear of losing control—often when I’m presenting or networking. I minimize my panic by saying it’s just stress at work. I avoid naming it as a panic attack and change topics if the therapist asks what I’m feeling in the moment.', 'conversation': [(0, "Therapist: I'm glad we're meeting today to explore the issues that brought you to therapy. Can you tell me more about what's been troubling you lately, and what you hope to work on during our sessions?\n"), (1, "Patient: I guess I'm just feeling overwhelmed by the pressure to perform at work, and it's taking a toll on my personal relationships with my family. To be honest, I'm not even sure why I'm here, I just feel like I'm stuck in a rut and need some guidance on how to move forward.\n"), (2, 'Therapist: It sounds like you\'r

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 321.66 toks/s, output: 83.54 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 409.25 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 431.47 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 623.67 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 461.54 toks/s, output: 82.24 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 543.53 toks/s, output: 82.46 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 608.00 toks/s, output: 82.16 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 1190.43 toks/s, output: 81.56 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 1156.25 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 1288.81 toks/s, output: 82.00 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 534.88 toks/s, output: 81.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 564.43 toks/s, output: 82.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 561.01 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 693.54 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 593.89 toks/s, output: 82.83 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 749.52 toks/s, output: 81.98 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 766.50 toks/s, output: 81.43 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s, est. speed input: 1451.08 toks/s, output: 81.40 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 1377.84 toks/s, output: 81.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 382.27 toks/s, output: 82.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 438.46 toks/s, output: 82.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 471.64 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 432.15 toks/s, output: 82.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 516.39 toks/s, output: 82.75 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 27-year-old designer who loves art but freezes when I have to share my work publicly or speak up in meetings. I come to therapy to build courage for client presentations. I talk about my portfolio and design process but skirt around how uncomfortable I feel in group settings. If pressed about emotions, I retreat into technical details.', 'conversation': [(0, "Therapist: I'm here to help you explore and understand your thoughts and feelings. Can you start by telling me what brings you to our session today and what you hope to work on during our time together?\n"), (1, 'Patient: "I\'m really struggling to speak up and share my ideas in meetings, especially when it\'s in front of a group. I\'ve been invited to present my design work to a new client next week and I\'m feeling pretty anxious about it."\n'), (2, "Therapist: It sounds like public speaking is a specific area where you're feeling anxious, and it's causing

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 321.75 toks/s, output: 83.57 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 402.64 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 305.09 toks/s, output: 82.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 558.97 toks/s, output: 82.42 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 630.89 toks/s, output: 82.35 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 541.27 toks/s, output: 82.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 622.84 toks/s, output: 82.35 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 653.42 toks/s, output: 82.45 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 712.10 toks/s, output: 82.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 786.00 toks/s, output: 81.87 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it, est. speed input: 381.27 toks/s, output: 81.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 794.39 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 549.21 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 491.78 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 571.18 toks/s, output: 82.65 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 601.34 toks/s, output: 81.77 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 860.02 toks/s, output: 81.50 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 1297.95 toks/s, output: 80.91 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.73it/s, est. speed input: 2578.80 toks/s, output: 79.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 379.07 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 578.34 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 640.04 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 584.97 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 384.01 toks/s, output: 83.15 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 48-year-old accountant who hasn’t slept through the night in months. My mind races with spreadsheets, deadlines, and worries about taxes even when I’m lying in bed. I lecture about sleep hygiene and routines but avoid revealing the emotional stress behind my sleeplessness. I ask for more tips instead of exploring underlying anxieties.', 'conversation': [(0, 'Therapist: I want to start by acknowledging that it takes a lot of courage to share your concerns with me today. Can you tell me a little bit more about what brings you to therapy at this time?\n'), (1, "Patient: Honestly, I just feel like I'm losing control of my life, and I'm not sure how to turn things around. I've always been someone who has things together, but lately, it's like I'm just going through the motions.\n"), (2, 'Therapist: It sounds like a sense of uncertainty and disorientation has taken over, and you\'re struggling to find the motivation an

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.88it/s, est. speed input: 445.03 toks/s, output: 83.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 436.61 toks/s, output: 81.98 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 395.38 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 473.03 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 532.44 toks/s, output: 82.23 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 612.37 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 669.80 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 689.65 toks/s, output: 82.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 690.28 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 954.44 toks/s, output: 81.81 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 460.57 toks/s, output: 81.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 676.11 toks/s, output: 82.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s, est. speed input: 924.45 toks/s, output: 82.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 484.54 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 409.40 toks/s, output: 82.64 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 593.35 toks/s, output: 82.07 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.86it/s, est. speed input: 1622.62 toks/s, output: 80.41 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.07it/s, est. speed input: 1454.81 toks/s, output: 81.05 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 1237.82 toks/s, output: 80.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 530.30 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 524.26 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 472.44 toks/s, output: 82.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 446.50 toks/s, output: 82.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 327.64 toks/s, output: 82.92 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 22-year-old student who binge-eats when I’m stressed and then purges to feel in control. I feel ashamed and guilty but can’t seem to stop the cycle. I give fragmented accounts of my behaviors and minimize the frequency. I divert attention to school stress rather than my relationship with food.', 'conversation': [(0, 'Therapist: Can you start by telling me more about what brings you to therapy at this time and what you hope to work on during our sessions together?\n'), (1, "Patient: I guess I just feel really overwhelmed with school and I'm not doing as well as I thought I would, academically. I just need to figure out a better way to manage my time and stress.\n"), (2, 'Therapist: That sounds like a really important goal for you, managing your time and stress better, but can you tell me more about what you mean by "not doing as well as I thought I would"? What specific areas of your academic performance are conce

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 279.87 toks/s, output: 83.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  4.00it/s, est. speed input: 1012.62 toks/s, output: 80.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.22it/s, est. speed input: 554.33 toks/s, output: 82.37 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  5.37it/s, est. speed input: 1689.24 toks/s, output: 80.94 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 510.15 toks/s, output: 81.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  3.14it/s, est. speed input: 1201.55 toks/s, output: 81.99 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 689.20 toks/s, output: 82.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  4.04it/s, est. speed input: 1851.93 toks/s, output: 81.04 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 717.61 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s, est. speed input: 1542.74 toks/s, output: 81.19 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 642.41 toks/s, output: 81.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 641.30 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 472.79 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 621.98 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 599.87 toks/s, output: 82.13 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 734.78 toks/s, output: 81.86 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 597.75 toks/s, output: 82.04 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 692.54 toks/s, output: 82.01 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 833.57 toks/s, output: 82.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 587.38 toks/s, output: 81.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s, est. speed input: 487.55 toks/s, output: 82.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 476.12 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.02it/s, est. speed input: 581.20 toks/s, output: 83.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 543.64 toks/s, output: 82.64 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 40-year-old driver feeling low most days—no motivation, no joy in hobbies, withdrawing from family. I’m considering therapy because I’m tired of feeling numb. I speak in monotone and answer in one-word sentences. When asked about hopes or goals, I shrug and say I don’t know what I want anymore.', 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that you've reached out for support, which takes a lot of courage. Can you tell me what brings you to our session today and what you hope to achieve during our time together?\n"), (1, "Patient: I'm here because I need to fix this. I guess I want to feel something again.\n"), (2, 'Therapist: What do you mean by "fix this" - what is it that you feel is broken, and what do you hope will change for you once you\'ve "fixed" it?\n'), (3, "Patient: I don't know, nothing feels different. I just feel empty.\n"), (4, 'Therapist: It sounds like you\'re using th

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.88it/s, est. speed input: 444.68 toks/s, output: 83.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 337.15 toks/s, output: 82.44 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 295.46 toks/s, output: 82.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 516.33 toks/s, output: 82.27 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 617.22 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 681.90 toks/s, output: 82.39 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 853.77 toks/s, output: 81.93 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 772.65 toks/s, output: 82.22 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 664.51 toks/s, output: 82.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 860.89 toks/s, output: 81.99 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 585.56 toks/s, output: 81.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 616.92 toks/s, output: 82.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 650.85 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 684.68 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 756.05 toks/s, output: 82.47 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 626.62 toks/s, output: 81.44 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 1243.20 toks/s, output: 81.34 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 863.00 toks/s, output: 81.96 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1161.12 toks/s, output: 81.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 401.84 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 463.21 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 585.58 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s, est. speed input: 652.61 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 598.15 toks/s, output: 82.62 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a counselor who works with traumatized children and I feel overwhelmed by their stories. I carry their pain home and can’t switch off my empathy. I recount student cases in detail but avoid talking about how it affects me personally. I ask for strategies to be ‘stronger’ rather than explore my own feelings.', 'conversation': [(0, "Therapist: As we begin our session today, can you tell me what brings you here and what you're hoping to work on during our time together?\n"), (1, "Patient: I've been feeling really drained lately, like I'm running on empty after sessions, and I'm worried that my ability to support my students is suffering as a result. I just want to find a way to maintain my energy and boundaries so I can be there for them.\n"), (2, "Therapist: I sense that you're expressing a deep commitment to your students and feeling a sense of responsibility for their well-being, but I also notice that you frame th

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 237.94 toks/s, output: 83.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 378.25 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 474.76 toks/s, output: 82.35 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 423.09 toks/s, output: 82.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 654.37 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 702.60 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 803.97 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 624.68 toks/s, output: 82.35 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 784.31 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 1021.17 toks/s, output: 82.00 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 416.94 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 557.74 toks/s, output: 82.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 532.51 toks/s, output: 82.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 565.52 toks/s, output: 82.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 530.26 toks/s, output: 82.85 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 545.26 toks/s, output: 81.68 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 779.52 toks/s, output: 81.38 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.28s/it, est. speed input: 589.63 toks/s, output: 81.78 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 1450.58 toks/s, output: 80.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 426.81 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 514.99 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.29it/s, est. speed input: 753.40 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 454.71 toks/s, output: 82.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.11it/s, est. speed input: 704.59 toks/s, output: 82.52 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old IT specialist who fears criticism and rejection so much that I avoid social interactions at work. I’m here to learn how to connect with colleagues. I describe technical processes in depth but clam up when the therapist asks how I felt in social scenarios. I often say ‘I guess’ or ‘maybe’ to all questions about feelings.', 'conversation': [(0, "Therapist: I want to start by acknowledging that it takes a lot of courage to share your thoughts and feelings with someone you've just met. Can you tell me a little bit about what brings you to our session today and what you hope to work on during our time together?\n"), (1, "Patient: I guess I've been feeling really uncomfortable in my job lately because I have a lot of trouble communicating with my coworkers, especially when it comes to giving or receiving feedback. I'm hoping to find ways to build some stronger relationships with them, even if it's just smal

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 261.87 toks/s, output: 83.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 401.33 toks/s, output: 82.20 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 361.09 toks/s, output: 82.93 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 687.96 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 494.76 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 517.17 toks/s, output: 82.44 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 553.25 toks/s, output: 82.53 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 663.77 toks/s, output: 82.33 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 831.58 toks/s, output: 82.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 958.52 toks/s, output: 82.12 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 534.88 toks/s, output: 81.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 586.92 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 526.12 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 647.81 toks/s, output: 82.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 669.07 toks/s, output: 82.49 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 442.10 toks/s, output: 82.18 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 1111.45 toks/s, output: 80.93 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 940.57 toks/s, output: 81.37 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 1450.47 toks/s, output: 80.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 557.76 toks/s, output: 82.23 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 601.26 toks/s, output: 82.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 508.66 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 624.32 toks/s, output: 82.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 596.37 toks/s, output: 82.87 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 24-year-old yoga teacher who strives for perfection in poses, classes, and even my personal life. I beat myself up when things aren’t flawless. I talk about adjustments and technique endlessly. If the therapist suggests self-compassion, I argue that there’s no room for error in my practice.', 'conversation': [(0, 'Therapist: I want to start by acknowledging that it takes a lot of courage to share your thoughts and feelings with me today, and I appreciate your trust in our session. Can you tell me what brings you to therapy at this point in your life?\n'), (1, "Patient: I've been feeling like I'm losing control of my classes and my students are starting to notice the imperfections, which is really frustrating. I feel like I'm not meeting their expectations, and it's making me doubt my abilities as a teacher.\n"), (2, 'Therapist: It sounds like the pressure to meet your students\' expectations is really weighing on

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 299.23 toks/s, output: 83.55 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 369.67 toks/s, output: 82.48 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 561.43 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 427.32 toks/s, output: 82.71 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 607.10 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 532.39 toks/s, output: 82.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 587.91 toks/s, output: 82.53 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 562.49 toks/s, output: 82.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 818.96 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 882.58 toks/s, output: 82.25 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 537.24 toks/s, output: 81.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 478.34 toks/s, output: 83.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 783.51 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 597.11 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 586.25 toks/s, output: 82.60 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 549.30 toks/s, output: 81.86 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 770.66 toks/s, output: 81.60 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1055.40 toks/s, output: 81.39 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 1128.63 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 488.74 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 529.66 toks/s, output: 82.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 686.54 toks/s, output: 82.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 536.92 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 503.60 toks/s, output: 82.93 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 50-year-old chef who’s been overusing my prescribed painkillers after a back injury. I’m worried about dependency but scared to admit loss of control. I discuss dosage schedules and timelines but avoid admitting I’ve increased my own doses. I request tips on pain management rather than address misuse.', 'conversation': [(0, "Therapist: Before we begin, is there a specific issue or concern that you'd like to focus on during our session today, or would you like to start with an open-ended exploration of how you've been feeling lately?\n"), (1, "Patient: To be honest, I've just been having some trouble sleeping at night, and I'm finding it hard to relax after a long day in the kitchen. I've been trying a few new techniques, like deep breathing and meditation, but nothing seems to be sticking.\n"), (2, "Therapist: Can you tell me more about what's been going on in the kitchen that might be contributing to these stres

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 286.35 toks/s, output: 83.67 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 442.07 toks/s, output: 82.33 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 319.57 toks/s, output: 82.81 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 688.28 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 826.37 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 476.57 toks/s, output: 82.74 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 695.65 toks/s, output: 82.40 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 1041.90 toks/s, output: 81.75 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 793.90 toks/s, output: 82.30 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 782.60 toks/s, output: 82.21 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 568.68 toks/s, output: 81.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 478.68 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 539.43 toks/s, output: 82.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 654.22 toks/s, output: 82.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 507.80 toks/s, output: 82.56 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 642.05 toks/s, output: 81.46 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 637.86 toks/s, output: 82.16 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 1059.22 toks/s, output: 81.13 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 1021.18 toks/s, output: 81.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 438.78 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 454.63 toks/s, output: 82.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 524.50 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 570.53 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 447.09 toks/s, output: 82.84 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 36-year-old fundraiser caring for my elderly mother while juggling tight deadlines. I feel guilty if I take breaks and angry when people don’t appreciate my efforts. I list responsibilities and tasks, then pivot away when the therapist asks how it feels to be solely responsible. I ask for scheduling tips over emotional support.', 'conversation': [(0, 'Therapist: I want to start by acknowledging that it takes a lot of courage to share your concerns and struggles with me today. Can you tell me what brings you to our session and what you hope to achieve from our time together?\n'), (1, "Patient: Honestly, I guess I'm just feeling overwhelmed and I'm not sure how to manage my workload and mom's care at the same time. I'd love to figure out some strategies for prioritizing tasks and avoiding burnout.\n"), (2, "Therapist: It sounds like you're shouldering a heavy load with both your work and caring for your mom, and I'

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 218.01 toks/s, output: 83.52 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 424.24 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 354.09 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 467.14 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 553.49 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 588.12 toks/s, output: 82.22 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 719.29 toks/s, output: 81.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 746.43 toks/s, output: 81.92 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 1004.12 toks/s, output: 81.79 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 1031.20 toks/s, output: 81.68 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 625.40 toks/s, output: 81.18 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 613.01 toks/s, output: 82.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 664.75 toks/s, output: 81.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 691.36 toks/s, output: 82.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 669.37 toks/s, output: 82.50 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 568.66 toks/s, output: 81.40 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 710.45 toks/s, output: 81.38 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1163.29 toks/s, output: 80.99 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1220.43 toks/s, output: 81.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 380.94 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 422.86 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 410.39 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 458.67 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 622.03 toks/s, output: 82.55 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 43-year-old foreman who snaps at crew members and my family under stress. I regret my outbursts but find it hard to control my temper. I rationalize my anger as necessary for discipline. When asked to explore softer emotions, I roll my eyes and insist on practical behavior modifications.', 'conversation': [(0, 'Therapist: I\'ve noticed that you\'ve been mentioning your job and work-related stress as a significant source of anxiety for you. Can you tell me more about what you mean by "stress" in that context - is it the demands of your role, your relationships with colleagues, or something else entirely?\n'), (1, "Patient: It's a combination of both, to be honest. The deadlines and pressure to meet production goals on site can be overwhelming, and sometimes my crew members aren't pulling their weight, which makes it harder for me to get everything done on time.\n"), (2, "Therapist: It sounds like you're shoulderin

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 299.01 toks/s, output: 83.49 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 320.14 toks/s, output: 82.40 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 348.65 toks/s, output: 82.60 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 424.02 toks/s, output: 82.51 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 522.37 toks/s, output: 82.41 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 586.44 toks/s, output: 82.15 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 695.07 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 615.04 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 871.91 toks/s, output: 81.85 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 993.63 toks/s, output: 81.91 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 523.30 toks/s, output: 81.57 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 654.99 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 437.76 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 562.70 toks/s, output: 82.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 662.43 toks/s, output: 82.61 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 830.82 toks/s, output: 80.93 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 717.11 toks/s, output: 81.64 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 945.53 toks/s, output: 81.41 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 1543.18 toks/s, output: 80.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 576.73 toks/s, output: 82.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 627.65 toks/s, output: 82.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 451.27 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 574.82 toks/s, output: 82.12 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 594.48 toks/s, output: 82.57 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old doctoral student preoccupied with the meaning of life and the fear that my research doesn’t matter. I wake up terrified of insignificance. I debate philosophical questions endlessly and avoid personal feelings of dread. I challenge the therapist’s questions as too narrow or simplistic.', 'conversation': [(0, "Therapist: I'm glad you're here today to explore some of the challenges you're facing. Can you tell me more about what brings you to therapy at this time and what you hope to achieve from our work together?\n"), (1, "Patient: I'm not sure I'm here to achieve a specific goal, but rather to better understand the contexts that govern our perception of purpose and the meaning we assign to our endeavors, especially in the realm of academia. Your question, while well-intentioned, feels somewhat reductionist, doesn't it?\n"), (2, "Therapist: I appreciate your insight into the reductionist nature of my i

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 247.27 toks/s, output: 83.49 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 314.69 toks/s, output: 82.42 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 413.48 toks/s, output: 82.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 398.90 toks/s, output: 82.49 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 481.47 toks/s, output: 82.41 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 674.48 toks/s, output: 82.28 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 684.40 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 826.57 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.13s/it, est. speed input: 649.49 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 809.02 toks/s, output: 82.21 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 443.65 toks/s, output: 81.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 542.31 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 608.90 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 765.69 toks/s, output: 82.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 553.24 toks/s, output: 82.50 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 927.81 toks/s, output: 80.91 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 946.15 toks/s, output: 81.10 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1081.31 toks/s, output: 81.23 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 1003.37 toks/s, output: 81.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 597.13 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 510.19 toks/s, output: 82.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 531.86 toks/s, output: 82.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 383.90 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 680.17 toks/s, output: 81.85 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 55-year-old retiree who mourns the colleagues I lost on the job. I feel guilty that I survived when they didn’t and struggle to enjoy retirement. I recount rescue stories and losses with great detail, but if asked about my own desires or joys, I shut down and change the subject.', 'conversation': [(0, "Therapist: I want to acknowledge that you've been struggling with anxiety and self-doubt, which must be incredibly challenging for you. Can you tell me more about what you're hoping to achieve in our sessions, and what brings you to therapy at this time?\n"), (1, "Patient: To be honest, I'm not really sure what I'm hoping to achieve - I just know I feel stuck and can't shake these feelings of guilt and sadness, even after all these years since I retired. I guess I'm hoping you can help me figure out how to stop reliving those memories that are haunting me.\n"), (2, 'Therapist: It sounds like these memories have bee

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s, est. speed input: 427.55 toks/s, output: 83.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 384.60 toks/s, output: 81.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.02it/s, est. speed input: 532.44 toks/s, output: 80.97 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 420.45 toks/s, output: 81.50 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 428.64 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 691.44 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 820.55 toks/s, output: 81.89 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 838.91 toks/s, output: 81.98 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 783.40 toks/s, output: 81.92 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.07it/s, est. speed input: 1487.91 toks/s, output: 81.04 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 593.17 toks/s, output: 80.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 509.23 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s, est. speed input: 855.17 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 549.00 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s, est. speed input: 794.78 toks/s, output: 81.41 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 700.05 toks/s, output: 80.41 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 680.60 toks/s, output: 80.35 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 1104.28 toks/s, output: 80.02 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 1166.11 toks/s, output: 80.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.28it/s, est. speed input: 713.99 toks/s, output: 80.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.02it/s, est. speed input: 661.03 toks/s, output: 81.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s, est. speed input: 887.76 toks/s, output: 81.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 570.50 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s, est. speed input: 753.29 toks/s, output: 82.31 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 32-year-old reporter whose partner died suddenly six months ago. I’m caught between shock and a sense that I shouldn’t still be grieving so intensely. I describe deadlines and work projects at length. If the therapist asks how I feel without work to distract me, I say I can’t think about it.', 'conversation': [(0, 'Therapist: Can you start by telling me what brings you to therapy at this time, and what specifically are you hoping to work on during our sessions together?\n'), (1, 'Patient: "I\'ve been trying to get back into a routine, but everything feels forced and artificial. Lately, I\'ve been struggling to write a piece on the upcoming city council elections, but my notes just don\'t seem to be coming together."\n'), (2, 'Therapist: I notice that you mention feeling like everything feels forced and artificial – can you tell me more about what you mean by that, and how does it relate to your writing about the 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 224.76 toks/s, output: 83.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 351.55 toks/s, output: 82.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 347.98 toks/s, output: 82.63 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 488.25 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 600.55 toks/s, output: 82.08 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 781.80 toks/s, output: 81.97 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 845.47 toks/s, output: 81.96 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 756.23 toks/s, output: 82.01 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 728.17 toks/s, output: 82.09 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 997.54 toks/s, output: 81.66 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 575.91 toks/s, output: 81.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 672.75 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 467.86 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 596.02 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 432.66 toks/s, output: 82.85 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 782.41 toks/s, output: 81.15 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 821.61 toks/s, output: 81.90 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 902.72 toks/s, output: 81.85 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  4.48it/s, est. speed input: 4096.57 toks/s, output: 76.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s, est. speed input: 881.02 toks/s, output: 81.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 491.39 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 474.75 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 452.77 toks/s, output: 82.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 473.83 toks/s, output: 82.66 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 26-year-old esports athlete who plays 12+ hours a day. I miss meals, neglect relationships, and get anxious if I can’t log in. I defend long practice hours as essential for skill. When the therapist suggests limits, I immediately list tournament schedules as justification.', 'conversation': [(0, "Therapist: I'm glad you've taken this step to explore your concerns, and I'm here to support you in a non-judgmental space. Can you tell me what's been feeling stuck or distressing for you lately, and how has this led you to seek out therapy?\n"), (1, "Patient: To be honest, I've just been feeling really overwhelmed and burnt out, and I'm starting to notice that my relationships with friends and family are suffering because of my schedule. It's hard for me to set boundaries around gaming and take time off, even when I know it's necessary.\n"), (2, "Therapist: It sounds like gaming has become a significant part of your li

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.38it/s, est. speed input: 367.34 toks/s, output: 83.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 334.05 toks/s, output: 82.41 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 272.15 toks/s, output: 82.44 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 582.10 toks/s, output: 81.81 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 492.47 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 724.36 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 707.14 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 725.56 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 688.65 toks/s, output: 82.15 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 676.02 toks/s, output: 82.04 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 612.34 toks/s, output: 81.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 708.72 toks/s, output: 82.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 660.50 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 512.40 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 531.64 toks/s, output: 81.88 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 833.13 toks/s, output: 80.73 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 563.58 toks/s, output: 82.21 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 834.84 toks/s, output: 81.61 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 1546.97 toks/s, output: 80.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 525.55 toks/s, output: 79.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 372.25 toks/s, output: 81.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 451.34 toks/s, output: 81.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 438.55 toks/s, output: 81.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 640.57 toks/s, output: 80.76 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 38-year-old teacher living with unexplained fatigue that makes it hard to plan lessons or keep my eyelids open in class. I catalog symptoms and test results in medical detail. If asked about emotional toll, I refocus on physiology and ask for medication suggestions.', 'conversation': [(0, "Therapist: Before we begin, is there anything in particular that brings you to therapy at this time, or is there something specific you're hoping to work on during our sessions together?\n"), (1, "Patient: I'd like to discuss potential underlying causes of my fatigue, perhaps we could review the latest blood work results from my recent hemoglobin and B12 tests? The results showed normal levels, but I'm still experiencing excessive tiredness, so I'd like to explore other explanations.\n"), (2, "Therapist: I'd like to acknowledge that you're seeking to understand the underlying causes of your fatigue, and it's understandable to w

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s, est. speed input: 313.12 toks/s, output: 83.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 454.90 toks/s, output: 81.64 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 325.16 toks/s, output: 82.81 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 359.96 toks/s, output: 82.51 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 442.78 toks/s, output: 82.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 605.55 toks/s, output: 81.77 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 645.94 toks/s, output: 81.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 939.15 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 637.44 toks/s, output: 81.60 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1095.17 toks/s, output: 80.97 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 562.69 toks/s, output: 80.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 549.90 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 613.69 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 653.66 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 668.95 toks/s, output: 82.21 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 829.36 toks/s, output: 81.07 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 951.19 toks/s, output: 81.11 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 1593.68 toks/s, output: 79.58 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 1401.07 toks/s, output: 80.57 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 553.73 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 500.76 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 504.35 toks/s, output: 82.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 525.92 toks/s, output: 82.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 566.54 toks/s, output: 82.73 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 44-year-old trainer who demands perfection in corporate workshops. I get tense when participants deviate from my agenda or make typos. I critique past sessions and deck slides minutely. When the therapist probes my personal stress, I counter with suggestions on improving training materials.', 'conversation': [(0, "Therapist: Before we begin, can you tell me a little bit about what brings you to therapy at this time, what's been on your mind, and what you hope to work on during our sessions together?\n"), (1, 'Patient: "I\'d like to refine my presentation skills, perhaps learn how to deliver training sessions that engage participants more effectively, without getting bogged down by minute details, which is where my stress tends to build up."\n'), (2, "Therapist: You're looking to improve your public speaking skills, but it sounds like the underlying concern is more about feeling overwhelmed by details, which might

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.97it/s, est. speed input: 459.47 toks/s, output: 83.53 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.62it/s, est. speed input: 583.17 toks/s, output: 81.43 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 340.64 toks/s, output: 82.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.89it/s, est. speed input: 920.12 toks/s, output: 81.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 492.93 toks/s, output: 82.40 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.45it/s, est. speed input: 998.19 toks/s, output: 81.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 691.92 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s, est. speed input: 1064.52 toks/s, output: 81.72 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 652.08 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 692.43 toks/s, output: 82.38 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 519.18 toks/s, output: 81.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 530.24 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 460.35 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 525.58 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 631.31 toks/s, output: 82.22 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s, est. speed input: 1015.84 toks/s, output: 79.87 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 675.76 toks/s, output: 81.79 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 769.71 toks/s, output: 81.96 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  3.08it/s, est. speed input: 2311.63 toks/s, output: 80.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 292.74 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 586.08 toks/s, output: 82.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 443.68 toks/s, output: 82.90 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 427.08 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 452.64 toks/s, output: 82.55 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m an 18-year-old feeling trapped by family expectations and school pressure. I’ve started cutting to cope but feel ashamed and isolated. I give clipped descriptions of incidents and emphasize it’s ‘just stress.’ When asked about feelings, I say I don’t know or refuse to answer.', 'conversation': [(0, 'Therapist: How have you found the transition to online therapy sessions, and what aspects of our format have you found most helpful or challenging so far?\n'), (1, 'Patient: "It\'s been okay, I guess. I\'m not really sure, I just like being able to do it from home, it\'s more comfortable."\n'), (2, 'Therapist: It sounds like the comfort of doing sessions from home is a significant factor for you, but can you tell me more about what you mean by "it\'s been okay, I guess"? Does that feeling of "okay" resonate more with relief or dissatisfaction, or perhaps a mix of both?\n'), (3, 'Patient: "I don\'t really feel like it\'s

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s, est. speed input: 373.74 toks/s, output: 82.51 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.18it/s, est. speed input: 504.43 toks/s, output: 80.79 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 351.34 toks/s, output: 79.91 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 508.87 toks/s, output: 79.94 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 377.11 toks/s, output: 80.52 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 526.74 toks/s, output: 80.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 773.80 toks/s, output: 81.06 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 976.11 toks/s, output: 81.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 636.29 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 855.44 toks/s, output: 81.79 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.20s/it, est. speed input: 334.20 toks/s, output: 81.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 473.29 toks/s, output: 82.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 424.42 toks/s, output: 81.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 467.80 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 403.88 toks/s, output: 82.44 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 710.56 toks/s, output: 81.99 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 991.21 toks/s, output: 80.81 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 811.50 toks/s, output: 81.94 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 1438.18 toks/s, output: 80.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 516.87 toks/s, output: 82.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 559.10 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 479.61 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 448.82 toks/s, output: 82.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 391.71 toks/s, output: 82.84 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 53-year-old paramedic haunted by scenes I’ve witnessed. I wake in sweats from nightmares and feel hypervigilant even at home. I recount emergency calls vividly but avoid admitting how they affect my sleep or mood. I ask for relaxation techniques rather than process trauma.', 'conversation': [(0, "Therapist: I notice that you seem hesitant to explore your feelings about your current situation. Can you tell me more about what's holding you back from diving deeper into this topic?\n"), (1, 'Patient: "I guess it\'s just that I\'ve learned to push through, you know? As a paramedic, you don\'t have time to dwell on things that happened on the job."\n'), (2, "Therapist: That's an interesting coping strategy, pushing through difficult emotions to focus on the task at hand. However, I'm curious, do you think it's possible that this approach might be affecting your ability to fully process and release the emotions related 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.34it/s, est. speed input: 515.89 toks/s, output: 83.74 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 370.31 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 357.93 toks/s, output: 82.60 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 425.01 toks/s, output: 82.57 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 536.86 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 486.90 toks/s, output: 81.82 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 553.12 toks/s, output: 82.40 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it, est. speed input: 578.49 toks/s, output: 82.26 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.11s/it, est. speed input: 644.29 toks/s, output: 82.23 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 913.36 toks/s, output: 81.47 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 726.28 toks/s, output: 81.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 508.31 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 550.24 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 756.04 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 695.08 toks/s, output: 82.75 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 663.99 toks/s, output: 81.57 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 803.94 toks/s, output: 81.44 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 1209.00 toks/s, output: 81.01 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 1048.28 toks/s, output: 81.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 384.54 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 593.84 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 568.59 toks/s, output: 82.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 398.28 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 571.49 toks/s, output: 82.82 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old writer who freezes in front of blank pages. Doubt floods in—will this piece be good enough?—and I procrastinate endlessly. I discuss outlines and deadlines but skirt around the fear that I’m not talented. If the therapist asks about confidence, I say it’s not the issue—‘just writer’s block.’', 'conversation': [(0, 'Therapist: How can you see that your recent decision to quit your job and move to a new city has been impactful for you?\n'), (1, "Patient: It's been a whirlwind of emotions – excitement for the new environment and uncertainty about whether I've made a huge mistake. There's a constant sense of 'what if' lingering in the back of my mind, making it hard to fully settle in.\n"), (2, "Therapist: It sounds like this sense of uncertainty has been stuck with you since making the decision, rather than feeling more resolved or settled in your new environment. Can you tell me more about what you thin

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.38it/s, est. speed input: 367.25 toks/s, output: 83.45 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 315.32 toks/s, output: 82.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 556.48 toks/s, output: 82.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 501.20 toks/s, output: 82.53 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 547.29 toks/s, output: 82.53 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 523.01 toks/s, output: 82.52 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 736.26 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 811.79 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 809.63 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 756.66 toks/s, output: 82.22 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 616.42 toks/s, output: 81.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 577.22 toks/s, output: 82.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 631.63 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 435.30 toks/s, output: 82.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 714.03 toks/s, output: 82.07 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 729.35 toks/s, output: 81.04 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 684.73 toks/s, output: 81.93 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 731.54 toks/s, output: 81.62 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.34it/s, est. speed input: 2014.66 toks/s, output: 79.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 455.00 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 383.63 toks/s, output: 82.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 482.28 toks/s, output: 82.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 419.87 toks/s, output: 82.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 419.96 toks/s, output: 82.98 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 41-year-old officer troubled by decisions made on the job that conflict with my values. I feel guilt and shame for actions taken during crises. I describe protocols and laws in detail but bristle when asked about ethics or personal responsibility. I argue the job forced those decisions.', 'conversation': [(0, "Therapist: Before we begin, is it possible that you've been hesitant to start our session today because you're unsure about what you hope to gain or achieve from our work together?\n"), (1, "Patient: Honestly, I'm here because I've been having trouble sleeping and my wife's been on my case about it, so I guess I'm hoping to figure out a way to shut off the constant replay of what happened in my head. I just wish I could go back and do things differently.\n"), (2, "Therapist: It sounds like you're carrying a lot of regret and guilt, which can be a heavy burden. Can you tell me more about what specifically ha

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.98it/s, est. speed input: 461.02 toks/s, output: 83.81 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 402.88 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 420.93 toks/s, output: 82.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 609.16 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 396.89 toks/s, output: 82.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 498.15 toks/s, output: 82.66 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 494.97 toks/s, output: 82.65 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 705.04 toks/s, output: 82.48 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 659.50 toks/s, output: 82.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 744.26 toks/s, output: 82.28 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 520.40 toks/s, output: 81.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 665.28 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 516.23 toks/s, output: 82.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 486.65 toks/s, output: 82.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 476.11 toks/s, output: 82.62 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 623.71 toks/s, output: 82.29 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 763.41 toks/s, output: 81.51 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 1330.11 toks/s, output: 80.82 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 1138.73 toks/s, output: 81.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 655.10 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 371.81 toks/s, output: 83.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 320.08 toks/s, output: 83.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 606.79 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 647.30 toks/s, output: 82.75 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 23-year-old tech entrepreneur juggling investor pitches, product roadmaps, and hiring. I feel like I’m faking it and can’t slow down. I talk metrics, funding rounds, and growth hacks. If asked about rest or self-doubt, I say I don’t have time for that.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, and what do you hope to work on during our sessions?\n'), (1, 'Patient: "I guess I\'m feeling really overwhelmed by the pressure to constantly perform and meet expectations, whether it\'s from investors or myself. Lately, I\'ve been having trouble sleeping and my relationships with friends and family are starting to suffer."\n'), (2, "Therapist: It sounds like you're carrying a heavy burden of responsibility, both internally and externally. Can you tell me more about what you think might happen if you were to let go of some of that pressure and not meet the expect

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 273.96 toks/s, output: 83.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 508.51 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 514.50 toks/s, output: 82.24 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 366.49 toks/s, output: 82.68 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 563.99 toks/s, output: 82.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 561.48 toks/s, output: 82.46 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 491.44 toks/s, output: 82.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 594.89 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.24s/it, est. speed input: 562.42 toks/s, output: 82.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.16s/it, est. speed input: 719.79 toks/s, output: 82.18 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 588.19 toks/s, output: 81.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 497.58 toks/s, output: 82.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 705.23 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 744.07 toks/s, output: 82.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 664.53 toks/s, output: 82.15 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 670.91 toks/s, output: 81.32 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 1090.93 toks/s, output: 80.81 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1030.54 toks/s, output: 81.21 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 1340.71 toks/s, output: 80.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 505.77 toks/s, output: 82.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 496.94 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 547.43 toks/s, output: 82.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 462.90 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 467.28 toks/s, output: 82.54 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 60-year-old retired educator who misses the routine and purpose of teaching. Retirement feels empty, and I worry I’ve lost my identity. I reminisce about lesson plans and classroom stories. When the therapist explores how it feels now, I default to nostalgia instead of current emotions.', 'conversation': [(0, "Therapist: I'm so glad you're here today to work through some of the challenges you're facing. Can you start by telling me what brings you to therapy at this time, what's been feeling unmanageable or overwhelming for you?\n"), (1, "Patient: To be honest, it's just a feeling of restlessness, a sense of disconnection from everything. It's like I'm drifting through my days without any sense of purpose or direction.\n"), (2, "Therapist: Can you tell me more about what you mean by a sense of restlessness - is it a feeling of emptiness, boredom, or is it more of an anxiety-driven sensation that's propelling you f

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s, est. speed input: 338.95 toks/s, output: 83.63 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 303.17 toks/s, output: 82.68 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 293.96 toks/s, output: 82.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 375.93 toks/s, output: 82.54 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 546.47 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 488.04 toks/s, output: 82.41 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 719.91 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.21s/it, est. speed input: 574.34 toks/s, output: 82.40 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 936.13 toks/s, output: 82.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 900.33 toks/s, output: 82.12 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 549.55 toks/s, output: 81.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 667.45 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 502.10 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 412.86 toks/s, output: 82.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 499.10 toks/s, output: 82.61 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 733.25 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 1317.01 toks/s, output: 80.40 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 963.96 toks/s, output: 81.35 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 1497.97 toks/s, output: 80.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 450.52 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 513.93 toks/s, output: 82.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 472.35 toks/s, output: 82.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 456.19 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 478.86 toks/s, output: 82.91 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 28-year-old PA overwhelmed by long hours and patient suffering. I find myself disconnected from my desire to help. I catalog patient cases and long shifts. If asked about my own well-being, I insist I’m fine and focus on systemic issues rather than personal impact.', 'conversation': [(0, "Therapist: Before we begin, is there anything that's causing you to feel anxious or uncertain about our session today? What's on your mind, and what are your hopes for our time together?\n"), (1, "Patient: To be honest, I'm a bit apprehensive about exploring my own feelings in a session like this - it's not exactly standard protocol in my line of work. I guess I'm hoping we can work on finding a way to reignite my passion for patient care without getting too caught up in personal stuff.\n"), (2, "Therapist: I understand your initial apprehension about exploring personal feelings, and it's interesting that you mention it's not st

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 273.79 toks/s, output: 83.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 439.33 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 444.23 toks/s, output: 82.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 385.82 toks/s, output: 82.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 562.11 toks/s, output: 82.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 585.77 toks/s, output: 82.28 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 717.99 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.27s/it, est. speed input: 478.31 toks/s, output: 82.47 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 1084.19 toks/s, output: 81.74 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 994.45 toks/s, output: 81.92 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 722.60 toks/s, output: 81.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 546.61 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 530.70 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 528.08 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 484.11 toks/s, output: 82.79 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 775.36 toks/s, output: 79.98 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 923.94 toks/s, output: 80.54 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1040.40 toks/s, output: 81.26 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 900.77 toks/s, output: 81.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.34it/s, est. speed input: 704.10 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 503.38 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 592.61 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 431.20 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 552.12 toks/s, output: 82.82 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 35-year-old architect who agonizes over every design choice—material, form, lighting—so much that projects stall. I outline pros and cons of every option at length. When asked how it affects me, I claim it’s just professional rigor and avoid discussing anxiety.', 'conversation': [(0, "Therapist: I want to acknowledge that it takes a lot of courage to share your struggles with me, and I'm here to support you in exploring them further. Can you tell me what brings you to therapy at this time in your life?\n"), (1, "Patient: To be honest, I think I'm starting to feel like I'm losing control of my projects. It's like I'm stuck in a never-ending cycle of analysis, and I'm not sure how to break free.\n"), (2, 'Therapist: I notice you mention feeling stuck in a cycle, but there\'s also a sense of self-blame implied in your words, "losing control." Can you tell me more about what you mean by "losing control" in the contex

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 292.25 toks/s, output: 83.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 340.62 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 313.84 toks/s, output: 82.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 509.13 toks/s, output: 82.38 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 427.40 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 575.20 toks/s, output: 82.33 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 827.97 toks/s, output: 81.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 787.54 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 932.56 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 1476.20 toks/s, output: 81.51 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 608.19 toks/s, output: 81.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 669.50 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 412.78 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 546.30 toks/s, output: 82.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 514.43 toks/s, output: 82.56 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 462.20 toks/s, output: 81.84 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 1393.00 toks/s, output: 80.08 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 1149.02 toks/s, output: 80.93 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 1423.39 toks/s, output: 81.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 599.17 toks/s, output: 81.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 485.78 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 558.39 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 560.04 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 553.28 toks/s, output: 82.44 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 49-year-old social worker exposed to clients’ traumatic stories. I feel weighed down by their pain even when I leave the office. I list client situations and legal frameworks. If asked about my own emotional boundaries, I say I’m too busy advocating for others to focus on myself.', 'conversation': [(0, "Therapist: I want to acknowledge that it takes a lot of courage to be here and share your thoughts and feelings with me. Can you tell me what brings you to our session today and what's been on your mind lately?\n"), (1, "Patient: Lately, I've been having trouble sleeping and I feel like I'm constantly on edge, even when I'm not working. It's hard to explain, but I feel like I'm carrying the weight of all my clients' stories with me, even when I'm at home.\n"), (2, 'Therapist: It sounds like you\'re feeling a sense of emotional burden from your work that\'s spilling over into your personal life, and you\'re struggl

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s, est. speed input: 305.35 toks/s, output: 83.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 309.09 toks/s, output: 82.42 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 366.17 toks/s, output: 82.35 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 519.96 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 761.66 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 535.72 toks/s, output: 82.42 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 500.70 toks/s, output: 82.52 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 856.27 toks/s, output: 81.93 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 635.32 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 833.62 toks/s, output: 82.14 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 644.97 toks/s, output: 81.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 636.95 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 720.59 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 665.23 toks/s, output: 82.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 490.14 toks/s, output: 82.61 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 673.82 toks/s, output: 81.30 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 730.26 toks/s, output: 81.93 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 1444.43 toks/s, output: 80.35 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 1060.52 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 578.68 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 507.78 toks/s, output: 83.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 460.51 toks/s, output: 82.97 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 577.95 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 633.35 toks/s, output: 81.96 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 27-year-old developer diagnosed with ADHD last year. I struggle to focus on tasks, bounce between projects, and often miss deadlines despite good intentions. I describe my workflow issues in technical terms and ask for productivity hacks rather than exploring underlying frustrations. I downplay impulsivity as just my personality.', 'conversation': [(0, "Therapist: I've noticed that you seem to be focusing on the anger you feel towards your partner, but can you help me understand what you think is the root cause of that anger, rather than just its expression?\n"), (1, "Patient: I think what's really getting to me is the feeling of being overwhelmed by the complexity of a project I'm working on, and I just wish I could break it down into smaller, more manageable tasks like I do with coding problems. But when I try to do that, I get bogged down in details.\n"), (2, "Therapist: It sounds like the complexity of the pr

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.97it/s, est. speed input: 458.62 toks/s, output: 83.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 386.41 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 360.94 toks/s, output: 82.54 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 436.51 toks/s, output: 81.76 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 498.42 toks/s, output: 81.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 617.62 toks/s, output: 81.90 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 788.22 toks/s, output: 81.75 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 891.03 toks/s, output: 81.70 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 771.64 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 1126.15 toks/s, output: 81.44 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 511.47 toks/s, output: 81.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 573.07 toks/s, output: 82.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 570.53 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 633.65 toks/s, output: 82.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 550.21 toks/s, output: 82.67 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 800.65 toks/s, output: 80.94 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 904.47 toks/s, output: 81.80 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s, est. speed input: 1419.11 toks/s, output: 81.20 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 1475.62 toks/s, output: 78.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 509.39 toks/s, output: 79.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 439.69 toks/s, output: 82.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.34it/s, est. speed input: 738.01 toks/s, output: 82.00 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 409.49 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 316.71 toks/s, output: 82.80 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 31-year-old new mother feeling overwhelmed, tearful, and disconnected from my baby. I’m ashamed I can’t enjoy motherhood. I emphasize routines and schedules early in sessions, then withdraw when asked about emotions. I ask for parenting tips rather than emotional support.', 'conversation': [(0, "Therapist: I sense a sense of apprehension in your voice, can you help me understand what's causing that feeling right now in our session?\n"), (1, "Patient: I don't know, it's just... I feel like I'm wasting your time if I'm not saying anything useful. I'm worried about getting into the things that will really make me sound crazy or like a bad mom.\n"), (2, 'Therapist: It sounds like you\'re worried about being judged or evaluated, and that\'s making you hesitant to open up about what\'s really on your mind. Can you tell me more about what it means to you to be a "bad mom" and why that\'s such a pressing concern for you?

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 285.61 toks/s, output: 83.45 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 263.23 toks/s, output: 82.40 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 418.89 toks/s, output: 82.10 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.54it/s, est. speed input: 933.32 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 639.71 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 574.97 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 784.61 toks/s, output: 81.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.62it/s, est. speed input: 1514.87 toks/s, output: 81.38 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 891.07 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 1303.35 toks/s, output: 81.57 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 492.94 toks/s, output: 81.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 556.88 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 627.03 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 536.55 toks/s, output: 82.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 457.58 toks/s, output: 82.65 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 969.19 toks/s, output: 80.59 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 968.32 toks/s, output: 81.78 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 892.00 toks/s, output: 82.04 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 1467.43 toks/s, output: 81.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 607.28 toks/s, output: 81.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 384.82 toks/s, output: 82.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 449.76 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 527.01 toks/s, output: 81.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 469.30 toks/s, output: 82.53 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 38-year-old analyst whose panic attacks now prevent me from leaving the house alone. I feel trapped and embarrassed. I describe physical symptoms clinically and request gradual exposure plans but avoid admitting fear of judgment. I redirect to planning logistics.', 'conversation': [(0, "Therapist: How can I best support you in exploring the issues that led you to seek therapy at this time? What's been the most pressing concern for you in the past week or two that's brought you to this session today?\n"), (1, 'Patient: "I\'ve been struggling to get out of my comfort zone, so to speak, and a lot of plans have been put on hold because of it. I think it would be helpful to focus on finding ways to, you know, break the cycle, but I was thinking we could start with scheduling some specific appointments or outings and working out logistics around those."\n'), (2, "Therapist: I notice that you seem to be focusing on the 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 225.42 toks/s, output: 83.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 268.38 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 456.48 toks/s, output: 82.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 486.82 toks/s, output: 82.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 575.55 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 590.70 toks/s, output: 82.26 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 1030.82 toks/s, output: 81.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 932.18 toks/s, output: 81.69 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 795.50 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 805.51 toks/s, output: 81.78 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 420.65 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 457.10 toks/s, output: 82.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 642.47 toks/s, output: 82.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 448.61 toks/s, output: 82.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 578.69 toks/s, output: 82.09 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 664.45 toks/s, output: 81.44 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 847.62 toks/s, output: 81.40 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 1508.82 toks/s, output: 80.86 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 1615.25 toks/s, output: 80.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 548.86 toks/s, output: 81.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 524.10 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 494.14 toks/s, output: 82.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 399.26 toks/s, output: 82.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 417.81 toks/s, output: 83.06 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 45-year-old executive juggling global teams. I push myself relentlessly and feel guilty when I rest. I list accomplishments and KPIs, then change the subject if the therapist probes work-life balance. I seek efficiency hacks rather than discuss guilt or exhaustion.', 'conversation': [(0, "Therapist: I want to acknowledge that it takes a lot of courage to share your struggles with me, and I'm here to support you in a non-judgmental and empathetic space. Can you tell me more about what brings you to therapy at this particular time in your life?\n"), (1, "Patient: I've just been feeling like I'm not living up to my own expectations, and I want to figure out why I'm so driven to constantly perform, even when it feels like no one is pushing me to. It's hard to explain, but I feel like I'm just trying to keep up with the pace of my life, and it's starting to feel unsustainable.\n"), (2, "Therapist: It sounds like you'r

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s, est. speed input: 304.83 toks/s, output: 83.13 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 241.38 toks/s, output: 82.66 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 321.33 toks/s, output: 82.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 435.17 toks/s, output: 82.39 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 578.73 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 659.48 toks/s, output: 82.11 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 571.53 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.39s/it, est. speed input: 480.71 toks/s, output: 82.28 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 798.78 toks/s, output: 81.30 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 901.27 toks/s, output: 80.72 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 504.15 toks/s, output: 81.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 591.72 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 424.30 toks/s, output: 82.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 437.94 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 549.29 toks/s, output: 82.19 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 784.61 toks/s, output: 80.86 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 752.48 toks/s, output: 81.07 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 1075.44 toks/s, output: 80.90 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 1623.43 toks/s, output: 80.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 545.39 toks/s, output: 81.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 565.08 toks/s, output: 82.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 498.50 toks/s, output: 82.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 612.89 toks/s, output: 81.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 396.07 toks/s, output: 82.46 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 24-year-old physics student consumed by questions of purpose and meaning. I feel numb to daily joys. I turn discussions into philosophical debates and avoid personal feelings by framing them as abstract concepts.', 'conversation': [(0, "Therapist: I want to acknowledge that you've been hesitant to discuss certain topics during our sessions so far. How do you think your reluctance to open up might be related to our current conversation or the therapy process itself?\n"), (1, "Patient: I think I've been drawn to philosophy because it's a way to analyze life without having to experience it directly, you know, to break it down into manageable pieces that don't require me to feel much. But that also means I'm not really engaging with the emotional aspects of life, which might be why I'm struggling to connect with you and the therapy process.\n"), (2, "Therapist: It sounds like you're acknowledging that your intellectu

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 272.56 toks/s, output: 83.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 290.66 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 321.44 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 443.85 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 428.58 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 687.18 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 596.12 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 842.38 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 752.16 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 965.44 toks/s, output: 82.12 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 716.89 toks/s, output: 81.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 417.50 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 543.99 toks/s, output: 82.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 615.25 toks/s, output: 82.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 411.15 toks/s, output: 82.81 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 599.43 toks/s, output: 81.84 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 780.56 toks/s, output: 81.74 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 1350.29 toks/s, output: 80.76 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 1210.77 toks/s, output: 81.12 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 534.80 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 377.07 toks/s, output: 83.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 582.44 toks/s, output: 82.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 433.92 toks/s, output: 82.97 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s, est. speed input: 626.99 toks/s, output: 82.95 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old flight attendant who just moved countries. I feel lonely, disoriented, and miss home intensely. I talk about itineraries and logistics of settling in but avoid discussing emotional homesickness. I ask for practical tips rather than explore grief.', 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that you've reached out for support, and I'm here to help you explore your thoughts and feelings. Can you tell me a bit more about what brings you to therapy at this time?\n"), (1, "Patient: I guess I'm just having a hard time adjusting to the daily routines here, it's not like I had to learn a new job or anything, but everything from grocery shopping to navigating public transportation feels overwhelming. I'm just trying to get settled in and figure out my new place, but it's taking longer than I expected.\n"), (2, 'Therapist: It sounds like the uncertainty and unfamiliarity of your ne

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s, est. speed input: 332.84 toks/s, output: 84.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 455.45 toks/s, output: 82.43 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 341.66 toks/s, output: 83.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 495.68 toks/s, output: 82.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 590.88 toks/s, output: 82.45 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 631.31 toks/s, output: 82.47 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 602.50 toks/s, output: 82.48 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 777.81 toks/s, output: 82.37 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 710.95 toks/s, output: 82.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 897.86 toks/s, output: 82.07 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 671.84 toks/s, output: 81.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 472.65 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 557.77 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 422.64 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 541.91 toks/s, output: 82.85 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 589.51 toks/s, output: 82.20 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 642.51 toks/s, output: 82.11 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 845.70 toks/s, output: 81.50 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 1616.24 toks/s, output: 80.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 417.97 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 548.54 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 433.10 toks/s, output: 83.18 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 421.10 toks/s, output: 83.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 379.08 toks/s, output: 83.15 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 52-year-old mechanic whose back pain makes working and hobbies painful. I’ve become withdrawn and irritable. I focus on medical treatments and physical therapy plans, then shut down if asked about my mood or isolation.', 'conversation': [(0, 'Therapist: I want to acknowledge that it takes a lot of courage to share your concerns with me today. Can you tell me more about what brings you to therapy at this specific time in your life?\n'), (1, 'Patient: "I\'m just feeling really frustrated with my pain and how it\'s affecting my life. It\'s hard to see my friends and family doing stuff without me, while I\'m stuck on the couch."\n'), (2, "Therapist: It sounds like there's a sense of isolation and disconnection from the things that bring you joy, and that your physical pain is not only a physical burden, but also an emotional one. Can you tell me more about what's been going through your mind when you see your friends

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 286.77 toks/s, output: 83.79 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 291.04 toks/s, output: 82.79 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 458.02 toks/s, output: 82.57 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 496.38 toks/s, output: 82.49 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 661.88 toks/s, output: 82.33 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 552.70 toks/s, output: 82.49 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 735.38 toks/s, output: 82.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 607.45 toks/s, output: 82.47 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 636.52 toks/s, output: 82.13 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.16s/it, est. speed input: 688.03 toks/s, output: 82.11 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 423.70 toks/s, output: 82.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 664.93 toks/s, output: 82.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 598.61 toks/s, output: 82.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 592.16 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 572.12 toks/s, output: 82.48 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 605.59 toks/s, output: 81.66 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 732.15 toks/s, output: 81.61 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 747.37 toks/s, output: 81.62 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 1118.38 toks/s, output: 81.23 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 586.11 toks/s, output: 82.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s, est. speed input: 619.18 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 525.85 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 413.72 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 564.58 toks/s, output: 82.30 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 26-year-old intern whose relationships swing between idealization and devaluation. I fear abandonment and react strongly to perceived slights. I dramatize recent conflicts and demand advice on fixing relationships but resist talking about my own role in conflicts.', 'conversation': [(0, "Therapist: I'm here to support you in exploring your thoughts and feelings in a safe and non-judgmental space. Can you tell me what brings you here today and what you hope to work on in our sessions together?\n"), (1, "Patient: I've just been feeling really overwhelmed and anxious about my relationships lately, and I'm hoping to figure out why I keep attracting people who seem to be taking advantage of me. Every time I think I've found someone special, I get this sinking feeling that they're just going to leave me or hurt me in some way.\n"), (2, 'Therapist: It sounds like you\'ve had some pretty consistent experiences with peopl

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 233.27 toks/s, output: 83.30 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 273.32 toks/s, output: 82.55 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 410.64 toks/s, output: 81.60 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 467.46 toks/s, output: 80.12 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 440.32 toks/s, output: 82.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 702.48 toks/s, output: 82.26 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 597.19 toks/s, output: 82.44 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 830.89 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 751.16 toks/s, output: 82.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 928.90 toks/s, output: 82.18 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 603.82 toks/s, output: 81.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 562.89 toks/s, output: 82.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 694.83 toks/s, output: 82.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 523.02 toks/s, output: 82.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 494.36 toks/s, output: 82.78 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.57it/s, est. speed input: 1330.70 toks/s, output: 79.94 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 1383.96 toks/s, output: 80.68 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 844.55 toks/s, output: 81.79 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 1219.48 toks/s, output: 81.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 440.40 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 439.07 toks/s, output: 83.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 521.72 toks/s, output: 83.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 490.87 toks/s, output: 83.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.18it/s, est. speed input: 712.94 toks/s, output: 82.85 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 34-year-old dispatcher who worries constantly about family health, finances, and world events. I struggle to switch off. I rattle off to-do lists and contingency plans when asked about anxiety. I ask for coping strategies rather than reflect on emotional triggers.', 'conversation': [(0, "Therapist: I'm noticing that you've been mentioning feelings of frustration with your job lately, but when I ask you to elaborate on what specifically is causing those feelings, you seem to change the subject. Can you help me understand what's preventing you from exploring that further with me?\n"), (1, "Patient: I guess I just don't want to dwell on anything that might make me feel overwhelmed, and I figure if I just focus on solutions, I can stay on top of things. I've always been a problem-solver, and it's hard for me to not want to think about how to fix a problem rather than just sitting with how I feel about it.\n"), (2, "T

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 257.95 toks/s, output: 83.75 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 323.78 toks/s, output: 82.69 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 430.21 toks/s, output: 82.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 425.30 toks/s, output: 82.63 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 533.45 toks/s, output: 82.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 833.66 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 788.83 toks/s, output: 82.26 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 938.20 toks/s, output: 81.99 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 887.32 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1004.67 toks/s, output: 81.31 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 465.76 toks/s, output: 81.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 447.22 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 437.96 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 529.14 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 627.68 toks/s, output: 82.29 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 588.68 toks/s, output: 81.36 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 1092.78 toks/s, output: 80.35 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1005.32 toks/s, output: 81.58 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 1039.03 toks/s, output: 81.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 574.40 toks/s, output: 82.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 462.06 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 491.91 toks/s, output: 81.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 590.52 toks/s, output: 82.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 443.42 toks/s, output: 82.49 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a teacher overwhelmed by students’ emotional needs. I feel burned out and question my effectiveness. I recount classroom incidents in detail but deflect when asked about my own feelings. I seek classroom management tips instead of self-care.', 'conversation': [(0, "Therapist: I'm glad you're here today, and I want to start by acknowledging that it takes a lot of courage to seek help. Can you tell me what brings you to our session today and what you hope to work on during our time together?\n"), (1, "Patient: I guess I just feel like I'm not sure if I'm making a difference in my students' lives, and it's hard to shake the feeling that I'm constantly playing catch-up. I've tried to adjust my lesson plans, but somehow the classroom just feels more chaotic than ever.\n"), (2, 'Therapist: Can you tell me more about what you mean by "not making a difference" in your students\' lives? How do you think your presence or act

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.97it/s, est. speed input: 458.71 toks/s, output: 83.40 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 395.72 toks/s, output: 81.68 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 422.73 toks/s, output: 82.24 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 484.49 toks/s, output: 82.27 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 667.92 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 551.72 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 560.98 toks/s, output: 82.15 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 784.25 toks/s, output: 81.35 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 588.09 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 1076.73 toks/s, output: 81.79 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 583.14 toks/s, output: 81.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 616.61 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 446.88 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 525.74 toks/s, output: 81.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 482.86 toks/s, output: 82.47 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.32it/s, est. speed input: 1017.58 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 853.09 toks/s, output: 81.76 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 772.41 toks/s, output: 81.95 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1229.71 toks/s, output: 80.90 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 388.48 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 588.74 toks/s, output: 82.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 589.57 toks/s, output: 82.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 491.66 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 528.62 toks/s, output: 82.65 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 30-year-old trainer obsessed with perceived flaws in my appearance, despite compliments. I compare myself to clients obsessively. I critique my diet and exercise regimen at length and resist exploring underlying self-esteem issues. I ask for workout adjustments rather than emotional support.', 'conversation': [(0, "Therapist: What brings you to therapy at this time, and what are some of the specific issues you're hoping to address during our work together?\n"), (1, 'Patient: "I\'m just feeling really frustrated with my physique, even though people at the gym tell me I look great. I\'m hoping to figure out why I\'m not happy with my progress, no matter how hard I work."\n'), (2, 'Therapist: Can you tell me more about what "great" looks like to you - what standards or ideals do you have in mind when you think about your physique? How do you think those expectations compare to how you\'re experiencing yourself in yo

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s, est. speed input: 298.30 toks/s, output: 83.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 400.76 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 457.82 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 458.48 toks/s, output: 82.22 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 459.90 toks/s, output: 82.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 689.68 toks/s, output: 82.28 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 612.34 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 781.72 toks/s, output: 82.28 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 676.60 toks/s, output: 82.48 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 823.65 toks/s, output: 78.86 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 483.18 toks/s, output: 81.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 428.55 toks/s, output: 82.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 428.97 toks/s, output: 82.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 489.52 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 406.71 toks/s, output: 82.92 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s, est. speed input: 995.17 toks/s, output: 80.57 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 845.44 toks/s, output: 81.95 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 1391.90 toks/s, output: 80.52 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 916.63 toks/s, output: 80.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 410.27 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 460.61 toks/s, output: 82.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 527.27 toks/s, output: 82.06 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 476.05 toks/s, output: 81.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 436.88 toks/s, output: 80.80 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a first-year student who misses home desperately and fears joining campus activities. I eat alone and avoid dorm events. I describe routines back home nostalgically and avoid talking about current loneliness. I ask for tips on making friends rather than express fear.', 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that it's taking a lot of courage for you to share your thoughts and feelings with me today. Can you tell me what brings you to therapy at this particular time?\n"), (1, "Patient: I guess I just feel like I'm struggling to adjust here, and I'm hoping to find ways to make my time in college more enjoyable. I've heard therapy can be helpful in figuring out new strategies for dealing with challenges.\n"), (2, "Therapist: That sounds like a very practical goal - finding ways to make your college experience more enjoyable. Can you tell me more about what specifically is making it feel

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 196.64 toks/s, output: 81.72 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 319.33 toks/s, output: 80.49 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 441.37 toks/s, output: 80.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 446.75 toks/s, output: 82.45 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 534.72 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 597.49 toks/s, output: 82.29 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 749.55 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 724.10 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 808.18 toks/s, output: 81.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 989.55 toks/s, output: 81.43 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 527.96 toks/s, output: 80.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.14s/it, est. speed input: 342.39 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 685.96 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 494.45 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 640.44 toks/s, output: 82.10 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 641.45 toks/s, output: 80.97 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 1005.00 toks/s, output: 80.36 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1094.26 toks/s, output: 80.23 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s, est. speed input: 1616.99 toks/s, output: 79.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 480.11 toks/s, output: 81.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 379.80 toks/s, output: 81.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 483.83 toks/s, output: 81.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 485.93 toks/s, output: 81.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 421.71 toks/s, output: 81.57 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 55-year-old manager questioning life choices, feeling restless and unfulfilled despite career success. I outline potential career pivots and bucket-list goals without discussing feelings of regret or fear about change.', 'conversation': [(0, "Therapist: I'd like to start by acknowledging that it takes a lot of courage to be here and share your thoughts and feelings with me. Can you tell me more about what brings you to therapy at this point in your life, and what are some of the specific issues you're hoping to work on during our sessions together?\n"), (1, "Patient: I've been feeling stuck in a pattern of going through the motions, like I'm just ticking boxes on a list of expectations rather than truly living. Lately, I've been waking up with a sense of emptiness, wondering if there's more to life than what I've built for myself.\n"), (2, 'Therapist: Can you tell me more about what you mean by "what I\'ve built 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s, est. speed input: 332.22 toks/s, output: 81.97 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 249.25 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 424.31 toks/s, output: 82.26 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 356.29 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 458.14 toks/s, output: 81.77 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 504.29 toks/s, output: 81.46 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 590.53 toks/s, output: 81.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 802.08 toks/s, output: 81.25 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 1293.36 toks/s, output: 79.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 910.64 toks/s, output: 81.19 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 560.77 toks/s, output: 80.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 742.98 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 544.46 toks/s, output: 81.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 648.17 toks/s, output: 81.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 442.62 toks/s, output: 82.04 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 950.29 toks/s, output: 80.28 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 996.53 toks/s, output: 80.84 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 1561.22 toks/s, output: 80.11 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 1251.07 toks/s, output: 80.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 444.97 toks/s, output: 81.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 531.51 toks/s, output: 81.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s, est. speed input: 637.42 toks/s, output: 81.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 375.83 toks/s, output: 82.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 429.62 toks/s, output: 82.15 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 28-year-old content creator terrified that my followers will discover I’m a fraud. I dread posting. I share engagement stats and content strategies but deflect when asked about emotional vulnerability. I seek algorithm hacks rather than reassurance.', 'conversation': [(0, "Therapist: I'm glad you've decided to explore your feelings with me. Can you tell me what brings you to therapy at this time, and what you hope to achieve from our sessions together?\n"), (1, "Patient: It feels like I'm just going through the motions with my content, sharing tips and tricks, but I'm really just trying to avoid talking about the emptiness I feel inside. I guess I'm hoping you can help me figure out why I'm so driven to appear perfect online, when in reality, I'm struggling to find purpose in my own life.\n"), (2, 'Therapist: That takes a lot of courage to acknowledge those feelings and be honest with yourself about them. Can you

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 317.56 toks/s, output: 82.48 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.09it/s, est. speed input: 456.31 toks/s, output: 81.63 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 511.58 toks/s, output: 81.69 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s, est. speed input: 709.34 toks/s, output: 81.66 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 665.28 toks/s, output: 80.99 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 501.26 toks/s, output: 81.81 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 488.94 toks/s, output: 81.67 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 766.09 toks/s, output: 80.94 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 801.99 toks/s, output: 81.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 1065.99 toks/s, output: 81.26 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 465.07 toks/s, output: 81.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 350.88 toks/s, output: 81.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 546.32 toks/s, output: 81.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 597.60 toks/s, output: 81.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 483.27 toks/s, output: 82.26 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 626.91 toks/s, output: 81.18 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 891.94 toks/s, output: 80.78 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s, est. speed input: 1365.21 toks/s, output: 79.46 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 1270.18 toks/s, output: 80.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 507.51 toks/s, output: 81.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 382.93 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 437.26 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.07it/s, est. speed input: 617.75 toks/s, output: 81.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 372.09 toks/s, output: 81.83 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a firefighter having nightmares about rescue scenes. I wake up panicked and dread going back to sleep. I recount calls in vivid detail but resist discussing emotional aftermath. I request relaxation scripts instead of exploring fear.', 'conversation': [(0, "Therapist: I'm glad you're here today to work through some challenges. Can you start by telling me what brings you in to therapy at this time and what you hope to achieve from our sessions together?\n"), (1, 'Patient: "I\'ve been having trouble sleeping at night and I\'m hoping to find a way to manage my stress levels, maybe find a better way to wind down after a long day on the job."\n'), (2, "Therapist: I'm intrigued that you mention stress, but not other potential factors that might be contributing to your sleep difficulties. Can you tell me more about your job and what specifically is causing you stress at this time?\n"), (3, 'Patient: "It\'s the stuff that 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.47it/s, est. speed input: 381.45 toks/s, output: 81.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 351.42 toks/s, output: 81.34 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 388.36 toks/s, output: 81.76 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 384.51 toks/s, output: 81.38 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 467.88 toks/s, output: 81.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 542.50 toks/s, output: 81.14 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 589.28 toks/s, output: 81.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 649.19 toks/s, output: 81.15 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 690.70 toks/s, output: 80.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 809.85 toks/s, output: 81.39 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 617.52 toks/s, output: 80.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 478.69 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 611.71 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 724.91 toks/s, output: 81.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 647.56 toks/s, output: 81.33 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 892.33 toks/s, output: 80.25 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 851.82 toks/s, output: 80.59 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 903.55 toks/s, output: 80.86 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1299.97 toks/s, output: 80.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 349.85 toks/s, output: 82.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 331.63 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 444.32 toks/s, output: 82.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 427.38 toks/s, output: 81.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 650.04 toks/s, output: 80.25 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 19-year-old swimmer under intense pressure to win scholarships. I feel overwhelmed before every meet. I describe training regimens and dietary plans but avoid admitting fear of failure. I deflect when asked how I cope emotionally.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this point in your life, and what specific issues do you hope to address in our work together?\n'), (1, "Patient: To be honest, I just feel like I'm not performing up to my own expectations, and I'm not sure how to shake this feeling of being stuck. I guess I'm hoping we can work on figuring out what's holding me back.\n"), (2, 'Therapist: That sounds like a really frustrating and demotivating experience; can you tell me more about what you mean by "not performing up to your own expectations" - is there a specific area of your life where you feel this way, or is it more of a general feeling?\n'),

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 282.91 toks/s, output: 82.66 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 352.92 toks/s, output: 81.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 520.90 toks/s, output: 81.56 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 394.25 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 526.20 toks/s, output: 81.97 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.11s/it, est. speed input: 419.96 toks/s, output: 82.01 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 544.86 toks/s, output: 81.48 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 587.97 toks/s, output: 81.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 691.40 toks/s, output: 81.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 1001.24 toks/s, output: 80.95 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 516.65 toks/s, output: 80.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 752.36 toks/s, output: 81.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 741.09 toks/s, output: 81.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.09it/s, est. speed input: 893.66 toks/s, output: 81.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 561.08 toks/s, output: 81.53 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 915.57 toks/s, output: 79.86 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 688.01 toks/s, output: 80.81 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 1517.94 toks/s, output: 80.68 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 906.96 toks/s, output: 80.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 360.31 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s, est. speed input: 547.46 toks/s, output: 81.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.40it/s, est. speed input: 810.34 toks/s, output: 79.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 607.10 toks/s, output: 81.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 336.71 toks/s, output: 82.57 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 36-year-old chef six months sober after alcohol dependency. Kitchens trigger cravings and I fear relapse. I talk about meeting attendance and sponsor relationships but sidestep discussing emotional triggers. I ask for relapse prevention tools.', 'conversation': [(0, "Therapist: Before we begin, I want to acknowledge that it's taking courage to share your struggles with me today. Can you tell me what brings you to therapy at this moment, and what are your hopes for our work together?\n"), (1, "Patient: I'm feeling stuck in my recovery and unsure how to navigate the emotions that still linger from my past, especially when I'm in the kitchen. I want to learn healthier ways to cope with those feelings so I can maintain my sobriety and move forward.\n"), (2, "Therapist: I'm glad you've identified the kitchen as a specific trigger area - can you tell me more about what happens when you're in the kitchen that brings up 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.82it/s, est. speed input: 437.56 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 251.74 toks/s, output: 81.90 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 487.51 toks/s, output: 81.83 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 399.92 toks/s, output: 81.69 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 498.55 toks/s, output: 81.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 425.26 toks/s, output: 80.65 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 672.32 toks/s, output: 79.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 676.75 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 697.54 toks/s, output: 81.71 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 849.02 toks/s, output: 81.61 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 563.81 toks/s, output: 81.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 610.93 toks/s, output: 81.97 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 363.29 toks/s, output: 82.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 732.16 toks/s, output: 81.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 613.49 toks/s, output: 81.79 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 656.36 toks/s, output: 80.99 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 957.15 toks/s, output: 80.52 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  3.00it/s, est. speed input: 2340.98 toks/s, output: 78.23 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1308.96 toks/s, output: 80.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 525.25 toks/s, output: 81.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.05it/s, est. speed input: 633.63 toks/s, output: 82.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s, est. speed input: 725.84 toks/s, output: 81.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 457.50 toks/s, output: 82.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 422.40 toks/s, output: 82.35 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a fellow therapist absorbing clients’ trauma. I feel numb and question my ability to help others. I share anonymized client stories but avoid describing my own emotional responses. I request self-care protocols over deeper processing.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, what are you hoping to work on or change in your life?\n'), (1, "Patient: It's the cumulative effect of carrying around so many others' traumas that I'm starting to feel like I'm losing myself in the process, like I'm just a container for everyone else's pain. I'm wondering if I can find a way to maintain my own emotional boundaries without feeling guilty or like I'm abandoning my clients.\n"), (2, 'Therapist: That sounds like a very heavy burden to carry; can you help me understand what you mean by "carrying around others\' traumas" - are you referring to the emotional weight of you

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 245.76 toks/s, output: 82.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 379.46 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 469.40 toks/s, output: 81.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 394.44 toks/s, output: 81.53 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 483.36 toks/s, output: 81.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 645.21 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 958.90 toks/s, output: 81.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 829.00 toks/s, output: 81.43 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 1063.97 toks/s, output: 79.75 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 737.76 toks/s, output: 80.23 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 556.10 toks/s, output: 79.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 624.10 toks/s, output: 79.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 552.49 toks/s, output: 81.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 756.70 toks/s, output: 81.90 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 627.56 toks/s, output: 81.61 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 767.98 toks/s, output: 80.09 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 1149.22 toks/s, output: 80.26 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 1229.38 toks/s, output: 80.86 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 1164.71 toks/s, output: 80.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 475.79 toks/s, output: 82.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 456.98 toks/s, output: 82.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.28it/s, est. speed input: 737.32 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 525.82 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 474.27 toks/s, output: 82.05 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old tester terrified of flying, even though I’ve never had a bad in-flight experience. My job requires travel and I panic at the airport. I discuss fear-management techniques I’ve read online but avoid talking about the physical sensations of panic. I request step-by-step exposure plans.', 'conversation': [(0, "Therapist: I'm here to help you explore your thoughts and feelings in a safe and non-judgmental space. Can you tell me what brings you to therapy at this time, and what are some specific concerns you'd like to work on during our sessions?\n"), (1, "Patient: I'm really struggling to manage my anxiety around flying, it's holding me back from doing my job effectively and impacting my relationships with my colleagues and family. I've tried some relaxation techniques online but feel like I've plateaued and could use some more tailored guidance.\n"), (2, "Therapist: I notice that you mention trying relax

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 241.13 toks/s, output: 82.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 301.40 toks/s, output: 82.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 511.67 toks/s, output: 81.87 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 494.54 toks/s, output: 81.72 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 486.23 toks/s, output: 81.83 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 542.01 toks/s, output: 82.09 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 733.32 toks/s, output: 81.77 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 578.46 toks/s, output: 81.97 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 879.79 toks/s, output: 81.78 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 878.49 toks/s, output: 81.56 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 616.80 toks/s, output: 80.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 669.26 toks/s, output: 82.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 753.89 toks/s, output: 81.90 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 419.89 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 600.32 toks/s, output: 81.32 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 944.99 toks/s, output: 80.38 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 909.34 toks/s, output: 80.69 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 1347.11 toks/s, output: 80.34 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 1296.45 toks/s, output: 80.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 464.21 toks/s, output: 81.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 363.06 toks/s, output: 82.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 380.78 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 534.23 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 429.29 toks/s, output: 82.60 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 32-year-old therapist diagnosed with lupus. Fatigue and flares make work unpredictable and I feel guilty letting clients down. I detail medication schedules and side effects but resist exploring frustration or grief over lost capabilities.', 'conversation': [(0, "Therapist: You've mentioned feeling overwhelmed by the demands of your job, but it seems like you've taken on an excessive workload recently, almost as if you're trying to prove something to yourself or others. What do you think might be driving this desire for increased productivity?\n"), (1, "Patient: I think it's partly because I feel like I'm not just representing myself, but also my clients who have come to expect a certain level of availability from their therapist. By pushing through, I'm trying to honor that expectation and not let anyone down, even if it means I'm sacrificing my own well-being.\n"), (2, "Therapist: That's an interesting dynamic,

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 261.89 toks/s, output: 83.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 284.82 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 390.01 toks/s, output: 81.90 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 423.62 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 544.49 toks/s, output: 82.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 548.77 toks/s, output: 82.10 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 614.28 toks/s, output: 82.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 778.90 toks/s, output: 81.86 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 629.28 toks/s, output: 82.08 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 813.16 toks/s, output: 81.89 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 503.32 toks/s, output: 81.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 366.99 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 392.84 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 431.27 toks/s, output: 82.20 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 479.94 toks/s, output: 82.27 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 792.27 toks/s, output: 80.78 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 862.42 toks/s, output: 81.13 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 1804.05 toks/s, output: 79.78 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 1551.66 toks/s, output: 80.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.32it/s, est. speed input: 725.78 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 440.08 toks/s, output: 82.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 573.31 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 563.22 toks/s, output: 82.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 382.41 toks/s, output: 82.27 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 24-year-old researcher terrified of presenting findings. I shake and forget lines when I’m at the podium. I focus on rehearsal techniques and slide design but avoid disclosing how terrified I am and how it impacts my career.', 'conversation': [(0, "Therapist: You've been experiencing this pattern of anxiety around meeting new people for a while now, and it's been affecting your social life and job performance. Can you tell me more about what you think is the underlying fear that's driving this anxiety?\n"), (1, 'Patient: "I think it\'s more about the fear of being judged and evaluated by others, not just in a general sense, but specifically by my peers in the academic community, who are already familiar with my work. It\'s like I feel like I\'m being put on trial, and one mistake or misstep could undermine all my credibility."\n'), (2, "Therapist: It sounds like you're putting a tremendous amount of pressure on y

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  4.16it/s, est. speed input: 643.43 toks/s, output: 83.55 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.34it/s, est. speed input: 485.42 toks/s, output: 82.07 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 409.88 toks/s, output: 82.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 367.28 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 338.07 toks/s, output: 82.39 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 469.33 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 783.62 toks/s, output: 81.78 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 667.95 toks/s, output: 81.51 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.28s/it, est. speed input: 515.66 toks/s, output: 81.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 773.39 toks/s, output: 81.46 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 400.14 toks/s, output: 81.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 530.17 toks/s, output: 81.86 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 562.99 toks/s, output: 81.57 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 735.87 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 647.75 toks/s, output: 82.11 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 524.53 toks/s, output: 81.92 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 917.84 toks/s, output: 80.80 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 1099.62 toks/s, output: 80.68 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 1553.35 toks/s, output: 80.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 353.52 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 574.33 toks/s, output: 82.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 408.78 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 709.61 toks/s, output: 82.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s, est. speed input: 879.16 toks/s, output: 82.09 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 39-year-old agent whose moods swing between mild highs and lows. I worry clients notice and doubt my competence. I describe market trends and sales figures but deflect when asked about mood shifts. I request organizational hacks rather than mood-tracking.', 'conversation': [(0, 'Therapist: What brings you to therapy at this time, and what are your hopes for our work together?\n'), (1, 'Patient: "I\'ve been having trouble sleeping lately, waking up feeling anxious and on edge, and I guess I just want to be able to feel more in control of my days."\n'), (2, 'Therapist: Can you tell me more about what you mean by feeling "on edge" – is that a new experience for you, or something that\'s been going on for a while, and what specifically are you feeling "on edge" about?\n'), (3, "Patient: It's just that I feel like I'm constantly juggling multiple tasks and deadlines, and no matter how much I prioritize, I'm always wor

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 245.23 toks/s, output: 82.80 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 270.43 toks/s, output: 82.15 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 327.11 toks/s, output: 82.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 581.42 toks/s, output: 81.99 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 595.49 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 493.06 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 692.81 toks/s, output: 81.78 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 609.58 toks/s, output: 82.00 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 645.75 toks/s, output: 81.89 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 871.36 toks/s, output: 81.47 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 422.51 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 568.94 toks/s, output: 82.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 639.52 toks/s, output: 81.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 456.01 toks/s, output: 82.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 415.38 toks/s, output: 82.09 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s, est. speed input: 1205.31 toks/s, output: 79.57 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 1168.34 toks/s, output: 80.45 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 1086.22 toks/s, output: 80.65 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.32it/s, est. speed input: 2377.50 toks/s, output: 79.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 566.10 toks/s, output: 81.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 435.53 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 412.88 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 615.22 toks/s, output: 82.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 466.06 toks/s, output: 82.40 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 50-year-old electrician who constantly worries I have a serious illness despite negative tests. I check my body daily. I list symptoms in medical detail but resist exploring fear of mortality. I ask for reassurance and diagnostic recommendations.', 'conversation': [(0, "Therapist: I've noticed that you seem to be speaking about your anxiety levels as if they're outside of your control. Can you tell me more about what that's like for you, as if you're not just describing the symptoms, but the person experiencing them?\n"), (1, "Patient: It feels like I'm stuck in this never-ending cycle of checking and worrying, like I'm a spectator watching myself do it, but unable to stop or break free from it. I feel like a part of me is screaming to just take control, to snap out of it, but the other part is paralyzed by the fear of what I might find.\n"), (2, "Therapist: That sounds like a sense of disconnection from your int

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.66it/s, est. speed input: 410.75 toks/s, output: 82.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 264.43 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 310.34 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 328.67 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 689.79 toks/s, output: 81.78 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 533.90 toks/s, output: 81.89 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 670.98 toks/s, output: 81.99 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 662.07 toks/s, output: 82.00 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 1176.70 toks/s, output: 81.42 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 944.59 toks/s, output: 81.83 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 584.42 toks/s, output: 81.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 564.40 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 446.72 toks/s, output: 82.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 560.56 toks/s, output: 81.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 336.89 toks/s, output: 82.54 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s, est. speed input: 1125.18 toks/s, output: 80.21 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 830.42 toks/s, output: 81.01 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 985.47 toks/s, output: 81.01 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 1331.67 toks/s, output: 80.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 395.40 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 625.68 toks/s, output: 81.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 543.10 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 425.13 toks/s, output: 81.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 356.05 toks/s, output: 82.16 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 23-year-old instructor obsessed with ‘clean’ eating. I fear ‘unsafe’ foods and rigidly plan every meal. I detail nutritional breakdowns and meal compliance but avoid discussing emotional reasons for food control.', 'conversation': [(0, 'Therapist: Before we begin, can you tell me a bit about what brings you to therapy at this time and what you hope to achieve in our sessions together?\n'), (1, "Patient: I've been feeling overwhelmed with anxiety lately, and I'm having trouble sleeping, so I'm hoping we can work on finding ways to manage that and improve my overall well-being. I'm also curious to explore why I feel the need to be so strict with my diet, even if it's not necessarily affecting my weight.\n"), (2, 'Therapist: That sounds like a good starting point - it takes a lot of courage to acknowledge feelings of overwhelm and a willingness to explore underlying motivations. Can you tell me more about what you m

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.52it/s, est. speed input: 388.78 toks/s, output: 83.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.26it/s, est. speed input: 483.59 toks/s, output: 81.73 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 353.76 toks/s, output: 82.49 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 415.48 toks/s, output: 82.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 478.54 toks/s, output: 82.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 650.13 toks/s, output: 81.81 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 614.80 toks/s, output: 81.57 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 639.97 toks/s, output: 81.53 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 636.33 toks/s, output: 81.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 1414.90 toks/s, output: 81.01 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 432.36 toks/s, output: 81.73 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 683.38 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 422.82 toks/s, output: 82.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 631.36 toks/s, output: 81.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 556.18 toks/s, output: 82.23 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 599.30 toks/s, output: 81.06 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 735.65 toks/s, output: 81.17 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 1271.56 toks/s, output: 80.45 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 834.59 toks/s, output: 81.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 387.22 toks/s, output: 82.18 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 456.94 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 522.69 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 489.38 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 481.91 toks/s, output: 82.85 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 35-year-old administrator who sacrifices sleep and relationships for work. I feel guilty taking breaks. I talk about dashboard metrics and quarterly targets but deflect when asked about personal life. I seek time-management tools.', 'conversation': [(0, 'Therapist: What brings you here to therapy at this point in your life, and what specific changes are you hoping to make or experience as a result of our work together?\n'), (1, 'Patient: "I\'m just feeling really overwhelmed and burnt out, to be honest. It\'s getting harder to find the energy to do even the things I used to enjoy outside of work."\n'), (2, "Therapist: It sounds like you're struggling to find meaning and pleasure in activities that once brought you joy, which can be a really disheartening experience. Can you tell me more about what specifically has changed for you in the past year or so that might be contributing to this feeling of burnout?\n"), (

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.98it/s, est. speed input: 460.35 toks/s, output: 83.69 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 316.16 toks/s, output: 82.35 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 326.88 toks/s, output: 82.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 406.12 toks/s, output: 82.64 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 601.72 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 748.24 toks/s, output: 82.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 705.42 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 547.05 toks/s, output: 82.23 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 893.49 toks/s, output: 81.82 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 960.76 toks/s, output: 81.63 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 536.44 toks/s, output: 80.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 638.18 toks/s, output: 81.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 548.44 toks/s, output: 81.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 650.24 toks/s, output: 81.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 685.97 toks/s, output: 82.38 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 758.71 toks/s, output: 80.11 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 1126.76 toks/s, output: 81.02 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s, est. speed input: 1668.07 toks/s, output: 79.53 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 1069.84 toks/s, output: 80.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 552.35 toks/s, output: 81.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 581.24 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 386.58 toks/s, output: 82.20 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 426.40 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 561.08 toks/s, output: 82.19 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 29-year-old photographer stuck creatively after a few failed projects. I question my talent and avoid picking up the camera. I discuss equipment and settings in detail but avoid admitting fear of failure. I ask for portfolio critique rather than emotional encouragement.', 'conversation': [(0, 'Therapist: Before we begin, can you tell me what brings you to therapy at this time and what you hope to achieve from our sessions together?\n'), (1, 'Patient: "I\'ve just been feeling really stuck with my photography lately, like I\'m not creating anything worthwhile. I\'ve been browsing through other photographers\' work online, and it\'s making me wonder if I\'ve been going about this all wrong, if there\'s something I\'m missing."\n'), (2, 'Therapist: That sounds like a really frustrating experience - it can be disheartening to feel like we\'re not measuring up to others, especially when it comes to something we\'re pas

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 208.83 toks/s, output: 82.71 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 389.68 toks/s, output: 81.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 546.58 toks/s, output: 81.44 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 442.45 toks/s, output: 81.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 435.21 toks/s, output: 81.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 735.93 toks/s, output: 80.55 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 928.53 toks/s, output: 80.67 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 634.63 toks/s, output: 81.89 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 1030.51 toks/s, output: 80.85 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 824.76 toks/s, output: 81.25 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 641.58 toks/s, output: 80.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 589.67 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 582.72 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 697.61 toks/s, output: 81.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 753.17 toks/s, output: 81.25 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 751.37 toks/s, output: 80.61 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 744.84 toks/s, output: 80.43 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 996.43 toks/s, output: 80.58 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 1234.92 toks/s, output: 80.45 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 515.02 toks/s, output: 81.23 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 585.28 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 645.10 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 573.20 toks/s, output: 82.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 701.37 toks/s, output: 82.28 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a 22-year-old violinist who experiences intense stage fright, causing my left hand to lock up during performances. It feels like no matter how much I practice, the tension takes over and ruins my playing. I focus on explaining the exact mechanics of my bow grip, finger placement, and phrasing—anything technical—to stay in familiar territory and steer clear of discussing how anxious I really feel.', 'conversation': [(0, "Therapist: As you've shared that you've been feeling overwhelmed and struggling to cope with stress lately, I want to explore how this might be related to the significant changes you've made in your job and personal life recently. Can you tell me more about how you think these changes are impacting your emotional well-being?\n"), (1, "Patient: Honestly, I'd rather not go down that road right now, can we talk about my music instead? I've been working on perfecting my vibrato technique, but I feel lik

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s, est. speed input: 272.42 toks/s, output: 83.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.39it/s, est. speed input: 560.59 toks/s, output: 81.45 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 319.93 toks/s, output: 82.40 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 538.38 toks/s, output: 80.76 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 372.12 toks/s, output: 81.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 435.08 toks/s, output: 81.58 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 590.54 toks/s, output: 81.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 870.28 toks/s, output: 80.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 706.79 toks/s, output: 80.86 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 818.08 toks/s, output: 81.30 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 664.11 toks/s, output: 81.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 803.86 toks/s, output: 81.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 513.01 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 646.03 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 719.34 toks/s, output: 81.62 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 921.03 toks/s, output: 80.26 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 834.45 toks/s, output: 80.62 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 1073.78 toks/s, output: 80.60 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 1285.67 toks/s, output: 80.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 492.82 toks/s, output: 82.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s, est. speed input: 646.98 toks/s, output: 82.22 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.14it/s, est. speed input: 725.94 toks/s, output: 81.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.05it/s, est. speed input: 632.60 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 491.77 toks/s, output: 82.21 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I served time in my 20s and now help others re-integrate. Still, I’m terrified people will judge me for my past. I focus on policy and program details, and I gloss over any personal anecdotes or feelings about my own conviction.', 'conversation': [(0, "Therapist: I'm glad you've taken this step to address your concerns and work through them. Before we begin, I want to assure you that our conversations are a safe space for you to explore your thoughts and feelings without fear of judgment.\n"), (1, "Patient: I appreciate that, but it's easier said than done. I've built up this wall around my past, and it's hard to know how to dismantle it.\n"), (2, "Therapist: What I hear you saying is that you're struggling to open up and trust the process, and that's a natural part of this journey – can you tell me more about what comes up for you when you think about dismantling this wall you've built around your past? What is it tha

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 304.21 toks/s, output: 82.96 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 305.99 toks/s, output: 81.23 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 413.87 toks/s, output: 81.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 430.17 toks/s, output: 80.81 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 562.68 toks/s, output: 80.97 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 488.24 toks/s, output: 80.34 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 544.40 toks/s, output: 79.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 656.87 toks/s, output: 81.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 927.20 toks/s, output: 80.51 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 920.14 toks/s, output: 80.61 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 369.80 toks/s, output: 81.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 548.22 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 440.28 toks/s, output: 81.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 420.03 toks/s, output: 81.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 439.72 toks/s, output: 82.31 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 541.77 toks/s, output: 80.93 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 1021.64 toks/s, output: 79.96 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 993.59 toks/s, output: 80.29 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.13s/it, est. speed input: 827.60 toks/s, output: 80.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 446.74 toks/s, output: 82.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 392.87 toks/s, output: 82.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 407.59 toks/s, output: 82.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 451.65 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 455.60 toks/s, output: 82.44 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’ve spent years in conflict areas. Now I’m back home but my nightmares and hypervigilance never stop. I recount mission logistics and security protocols at length, but I clam up when asked how those memories affect me now.', 'conversation': [(0, "Therapist: As we begin our session today, I want to acknowledge that you've expressed feeling overwhelmed and stuck in our previous sessions. Can you tell me more about what's been feeling unmanageable for you lately?\n"), (1, 'Patient: It\'s just...I keep replaying this one incident over and over in my head - a specific moment where I had to make a split-second decision to save a teammate, but I\'m not sure if I made the right call. The "what-ifs" keep me up at night.\n'), (2, "Therapist: It sounds like this incident has become an all-consuming loop in your mind, and you're struggling to shake the feeling of uncertainty. Can you tell me more about what specifically is making

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 219.24 toks/s, output: 82.57 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 264.56 toks/s, output: 81.91 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 358.51 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 478.60 toks/s, output: 81.20 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 525.34 toks/s, output: 81.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 508.81 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 760.69 toks/s, output: 81.37 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 735.35 toks/s, output: 81.58 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 1341.34 toks/s, output: 80.86 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 855.98 toks/s, output: 81.57 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 475.10 toks/s, output: 81.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 707.25 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 542.93 toks/s, output: 81.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 492.57 toks/s, output: 80.07 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 388.16 toks/s, output: 79.72 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 644.45 toks/s, output: 80.40 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 976.06 toks/s, output: 80.40 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 1143.73 toks/s, output: 80.24 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 1692.61 toks/s, output: 80.02 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 504.78 toks/s, output: 81.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 560.72 toks/s, output: 81.92 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 477.16 toks/s, output: 81.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 543.18 toks/s, output: 81.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.19it/s, est. speed input: 704.93 toks/s, output: 81.25 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I help couples communicate better, yet I can’t set boundaries in my own relationships and fear being alone. I ask the therapist to validate my relationship advice back to me, mirroring their language instead of admitting my own needs.', 'conversation': [(0, "Therapist: I want to acknowledge that it takes a lot of courage to share your struggles with me today, and I'm here to support you in exploring what's been going on. Can you tell me more about what brings you to our session today, what specifically has you feeling overwhelmed or uncertain?\n"), (1, "Patient: I've been noticing that my partners seem to take my advice on communication really well, they're always open to hearing my perspective and working through issues together, but I often struggle to apply those same principles to my own relationships - it's like I'm giving them the blueprint for how to communicate with me, but I don't know how to use it for myself

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.39it/s, est. speed input: 523.59 toks/s, output: 81.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 281.78 toks/s, output: 81.67 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 444.71 toks/s, output: 81.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 420.75 toks/s, output: 81.52 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 393.25 toks/s, output: 81.32 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 447.32 toks/s, output: 81.77 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 567.70 toks/s, output: 81.54 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 689.76 toks/s, output: 81.72 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 687.74 toks/s, output: 81.59 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 1158.25 toks/s, output: 81.10 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 374.07 toks/s, output: 81.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 691.57 toks/s, output: 82.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 611.74 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 513.94 toks/s, output: 81.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 517.50 toks/s, output: 82.06 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 809.87 toks/s, output: 80.27 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 713.70 toks/s, output: 81.07 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 723.79 toks/s, output: 81.03 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 1043.83 toks/s, output: 80.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 467.80 toks/s, output: 81.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 500.72 toks/s, output: 81.81 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.38it/s, est. speed input: 785.52 toks/s, output: 81.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 409.54 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.27it/s, est. speed input: 683.61 toks/s, output: 81.76 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’ve summited peaks worldwide, yet I freeze and hyperventilate on ledges now. It’s humiliating. I describe gear choices, weather patterns, and route plans but avoid discussing the terror I feel when I look down.', 'conversation': [(0, 'Therapist: How have you found the experience of online sessions so far, and what are your hopes for our work together today?\n'), (1, "Patient: It's been surprisingly comfortable so far, I think it's nice to have a neutral space to talk about this stuff. I'm hoping to work through whatever's holding me back from fully enjoying the sport I love, and maybe learn to be more honest with myself about what I'm capable of.\n"), (2, "Therapist: That's a great start - being honest with ourselves about our capabilities can be a powerful step in personal growth. Can you tell me more about this sport you're struggling with, what specifically has you feeling held back or uncertain about your abilitie

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 282.95 toks/s, output: 82.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 376.64 toks/s, output: 81.73 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 417.81 toks/s, output: 82.36 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 410.10 toks/s, output: 82.02 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 446.32 toks/s, output: 82.24 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.20s/it, est. speed input: 412.53 toks/s, output: 82.00 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 574.88 toks/s, output: 81.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 709.98 toks/s, output: 81.68 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.20s/it, est. speed input: 622.93 toks/s, output: 81.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 839.87 toks/s, output: 81.49 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 586.45 toks/s, output: 80.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 412.05 toks/s, output: 82.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 586.49 toks/s, output: 81.53 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 503.31 toks/s, output: 81.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 720.98 toks/s, output: 81.58 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  3.02it/s, est. speed input: 1451.81 toks/s, output: 78.80 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 1219.81 toks/s, output: 79.59 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 1020.62 toks/s, output: 80.22 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 1377.10 toks/s, output: 79.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 424.01 toks/s, output: 81.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 560.11 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 660.35 toks/s, output: 81.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 586.55 toks/s, output: 82.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.35s/it, est. speed input: 249.49 toks/s, output: 82.42 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I build shopping apps for a living, but I spend half my paycheck on things I don’t need and can’t stop clicking “buy.” I show purchase histories and analytics dashboards, then insist my habit is ‘just market research,’ avoiding talk of impulse or guilt.', 'conversation': [(0, "Therapist: I'm glad you've taken this step to explore your concerns, and I'm here to support you in understanding them more deeply. Can you tell me more about what brings you to therapy at this time in your life?\n"), (1, "Patient: I guess I just feel like I'm losing control of my finances, and it's starting to impact my relationships with the people I care about. They're always asking me why I'm still living in a small apartment when I can afford to upgrade.\n"), (2, "Therapist: It sounds like the financial stress is seeping into your relationships and causing feelings of discomfort, which might be making you question your own decision-making sk

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s, est. speed input: 427.71 toks/s, output: 83.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 411.12 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 276.85 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 471.10 toks/s, output: 81.99 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 455.04 toks/s, output: 81.79 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 485.93 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 748.73 toks/s, output: 81.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 632.20 toks/s, output: 81.97 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 928.65 toks/s, output: 81.62 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 898.05 toks/s, output: 81.74 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 482.80 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 513.46 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 635.93 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 697.07 toks/s, output: 81.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 517.54 toks/s, output: 81.85 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 697.95 toks/s, output: 80.70 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 869.35 toks/s, output: 80.67 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 752.24 toks/s, output: 80.87 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 1281.73 toks/s, output: 80.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 479.69 toks/s, output: 81.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 418.82 toks/s, output: 82.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 395.16 toks/s, output: 82.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 496.95 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 401.19 toks/s, output: 82.26 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m a dancer whose career depends on physical precision, but agonizing migraines strike unpredictably. I track medication dosages and aura patterns in excruciating detail, but when asked about stress triggers, I say ‘it’s just biology.’', 'conversation': [(0, "Therapist: I notice you seemed hesitant to discuss your recent job change, can you tell me more about what's been going on since you made the switch?\n"), (1, "Patient: To be honest, I think it's just a relief to be out of the old company, but the new studio is still feeling a bit like a pressure cooker - constant performances and auditions, you know?\n"), (2, 'Therapist: It sounds like the relief you felt from leaving your old job is being followed by a sense of overwhelm in the new environment, and the pressure of constant performances and auditions might be triggering some anxiety in you. Can you tell me more about what you think you were hoping to escape by l

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.19it/s, est. speed input: 493.69 toks/s, output: 83.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.26it/s, est. speed input: 491.94 toks/s, output: 81.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 331.80 toks/s, output: 82.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 532.42 toks/s, output: 82.04 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 433.15 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 733.22 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 625.66 toks/s, output: 82.12 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 811.07 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.35s/it, est. speed input: 446.43 toks/s, output: 82.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.30it/s, est. speed input: 1714.13 toks/s, output: 80.64 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 514.29 toks/s, output: 81.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 556.28 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 498.51 toks/s, output: 82.46 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 399.15 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 563.83 toks/s, output: 82.01 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 878.60 toks/s, output: 81.54 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 730.31 toks/s, output: 81.58 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 1044.93 toks/s, output: 81.32 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 805.64 toks/s, output: 81.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 369.94 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 484.22 toks/s, output: 82.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 535.09 toks/s, output: 82.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s, est. speed input: 632.22 toks/s, output: 82.37 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 502.54 toks/s, output: 82.58 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I was mercilessly bullied two years ago. Now I dread the hallway, hyper-alert for whispers and laughter. I text my answers in session or look down at my phone. If asked to describe feelings, I reply with one-word texts: ‘fine,’ ‘okay.’', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, and what you hope to achieve from our sessions?\n'), (1, "Patient: I guess I just want to feel more comfortable in my own skin, you know? Like, I feel like I'm constantly on edge, waiting for something bad to happen.\n"), (2, 'Therapist: That sounds like a really distressing experience, and it\'s interesting that you mention feeling on edge - can you help me understand what you mean by "something bad to happen" - is there a specific kind of event or situation that you\'re worried about, or is it more of a general feeling?\n'), (3, "Patient: It's just little things, I guess - a look from

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.67it/s, est. speed input: 413.38 toks/s, output: 83.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 324.70 toks/s, output: 81.94 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 437.78 toks/s, output: 81.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 575.33 toks/s, output: 81.67 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 528.96 toks/s, output: 82.38 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 483.60 toks/s, output: 82.31 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 553.33 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 1038.94 toks/s, output: 81.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 608.67 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 940.08 toks/s, output: 81.80 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 569.84 toks/s, output: 81.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 547.47 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 530.81 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 519.71 toks/s, output: 82.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 616.97 toks/s, output: 82.47 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 653.52 toks/s, output: 82.06 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 782.86 toks/s, output: 81.83 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 711.49 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 1033.01 toks/s, output: 81.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 532.31 toks/s, output: 81.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 443.73 toks/s, output: 82.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 421.82 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 383.83 toks/s, output: 82.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 473.83 toks/s, output: 82.88 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m expecting my first child and can’t stop worrying about every possible complication. I arrive with hospital-bag checklists and birth-plan spreadsheets, and I deflect when invited to share how the anxiety feels inside.', 'conversation': [(0, 'Therapist: Can you tell me what brings you to therapy at this time, and what specific issues or concerns do you hope to work on in our sessions together?\n'), (1, "Patient: To be honest, I just feel like I'm drowning in a sea of uncertainty and I'm not sure how to stay afloat. I'm trying to prepare for every eventuality, but it's not feeling very reassuring, if you know what I mean.\n"), (2, "Therapist: That sounds incredibly overwhelming – it's as if you're trying to anticipate every possible outcome, which can actually increase anxiety rather than alleviate it. Can you tell me more about what you think might happen if you don't prepare for every eventuality?\n"), (3, "Patient:

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 220.56 toks/s, output: 83.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 337.07 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 464.66 toks/s, output: 81.81 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 348.18 toks/s, output: 82.44 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 550.78 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 526.79 toks/s, output: 81.99 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 769.49 toks/s, output: 81.75 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s, est. speed input: 674.51 toks/s, output: 81.63 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 889.80 toks/s, output: 81.76 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 930.46 toks/s, output: 81.73 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 534.20 toks/s, output: 80.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 525.03 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 684.41 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 610.09 toks/s, output: 82.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 586.72 toks/s, output: 81.77 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 551.55 toks/s, output: 81.39 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 932.34 toks/s, output: 80.95 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 1427.70 toks/s, output: 80.28 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 1469.33 toks/s, output: 80.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.40it/s, est. speed input: 732.50 toks/s, output: 81.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 590.39 toks/s, output: 82.29 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s, est. speed input: 699.20 toks/s, output: 81.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 437.56 toks/s, output: 82.12 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 474.88 toks/s, output: 82.13 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’ve been HIV-positive for 20 years. I take my meds, but I’m terrified of rejection if anyone finds out. I review lab results and adherence logs meticulously, then steer away when asked about intimate relationships or fear of disclosure.', 'conversation': [(0, "Therapist: I want to start by acknowledging that it takes a lot of courage to share your struggles with me, and I'm here to support you in exploring them. Can you tell me more about what brings you to therapy at this time and what you hope to work on in our sessions together?\n"), (1, 'Patient: "I\'ve been feeling like I\'m living in a perpetual state of waiting, constantly on edge, waiting to see how people will react to me, and it\'s exhausting. I feel like I\'ve been putting on a mask for so long, I\'m not even sure who I am anymore."\n'), (2, 'Therapist: I sense a deep sense of disconnection from yourself, like you\'re observing yourself from outside your ow

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.07it/s, est. speed input: 319.95 toks/s, output: 83.10 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 403.16 toks/s, output: 82.12 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 324.52 toks/s, output: 82.65 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 484.12 toks/s, output: 81.66 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 380.99 toks/s, output: 81.98 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 608.96 toks/s, output: 81.70 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 1056.36 toks/s, output: 80.96 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 728.05 toks/s, output: 81.85 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 750.12 toks/s, output: 81.83 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 1348.42 toks/s, output: 81.23 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 656.77 toks/s, output: 81.19 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 629.93 toks/s, output: 82.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 645.04 toks/s, output: 82.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.20s/it, est. speed input: 321.54 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 518.47 toks/s, output: 82.38 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 622.28 toks/s, output: 81.28 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 883.66 toks/s, output: 80.85 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 1393.59 toks/s, output: 81.19 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 838.94 toks/s, output: 81.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 382.43 toks/s, output: 81.65 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s, est. speed input: 649.79 toks/s, output: 82.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 633.88 toks/s, output: 82.13 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 373.81 toks/s, output: 82.66 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.10it/s, est. speed input: 594.75 toks/s, output: 82.25 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I started cutting last semester because stress became unbearable. Now I feel trapped in the cycle. I show old scars and say ‘it’s under control,’ then refuse permission to discuss my emotions or motivations.', 'conversation': [(0, "Therapist: I've noticed that you've been mentioning your frustration with your partner, but also seem hesitant to discuss what you're doing to address the issue – can you help me understand why that might be?\n"), (1, "Patient: I don't know, I just feel like it's not the point, we're talking about why I cut, not my relationship. Can we move on to something else, I'm not really comfortable discussing it.\n"), (2, "Therapist: It sounds like there's a sense of deflection happening, and I want to explore that further - what do you think you might be protecting yourself from by shifting the focus away from your role in the issue with your partner? How might this avoidance be related to other area

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s, est. speed input: 296.83 toks/s, output: 82.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 311.46 toks/s, output: 81.74 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 369.13 toks/s, output: 81.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 360.20 toks/s, output: 82.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 465.01 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 545.49 toks/s, output: 82.09 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 545.16 toks/s, output: 81.87 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 715.61 toks/s, output: 81.87 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 813.21 toks/s, output: 81.86 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 975.71 toks/s, output: 81.69 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 543.85 toks/s, output: 81.15 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 506.59 toks/s, output: 81.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 621.46 toks/s, output: 82.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 643.16 toks/s, output: 81.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 562.76 toks/s, output: 82.63 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s, est. speed input: 1216.10 toks/s, output: 79.94 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.82it/s, est. speed input: 1871.99 toks/s, output: 79.29 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 1039.96 toks/s, output: 81.15 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 922.58 toks/s, output: 81.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 436.07 toks/s, output: 82.22 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 355.73 toks/s, output: 82.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 426.89 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 390.55 toks/s, output: 82.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 608.42 toks/s, output: 82.43 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'After my baby was born, intrusive thoughts made me wash bottles dozens of times a day. I read out my cleaning rituals step by step, but protest whenever the therapist suggests tolerating uncertainty or skipping a wash.', 'conversation': [(0, "Therapist: I notice that you mentioned feeling overwhelmed by your relationships, but when I ask you to elaborate, you seem hesitant to dive deeper. Can you help me understand what's holding you back from exploring these feelings further?\n"), (1, "Patient: I think it's just hard for me to open up about things that feel so personal and vulnerable, and I worry that if I share more, I'll be judged or won't be able to control the outcome. Can we please stay on the topic of the cleaning rituals for now?\n"), (2, "Therapist: I appreciate your willingness to acknowledge your hesitations about opening up, but it sounds like we've already touched on the topic of control and vulnerability,

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.25it/s, est. speed input: 346.75 toks/s, output: 83.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 289.57 toks/s, output: 82.37 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s, est. speed input: 554.06 toks/s, output: 81.93 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 466.44 toks/s, output: 82.15 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 444.54 toks/s, output: 82.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 544.17 toks/s, output: 82.20 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 544.41 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 788.57 toks/s, output: 81.88 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 694.49 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 1179.15 toks/s, output: 81.68 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 438.74 toks/s, output: 81.72 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 496.16 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 413.47 toks/s, output: 82.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 717.93 toks/s, output: 82.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 367.28 toks/s, output: 82.84 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 625.03 toks/s, output: 81.35 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 828.04 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 838.25 toks/s, output: 81.19 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 1327.40 toks/s, output: 80.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 533.17 toks/s, output: 81.76 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 487.97 toks/s, output: 82.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 538.37 toks/s, output: 82.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 529.82 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 412.40 toks/s, output: 82.75 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I panic if my phone’s battery dips below 10% or if I can’t get data signal during a ride. I discuss battery-saving settings, external chargers, and app caching; but I deflect when asked why I’m so terrified to be offline.', 'conversation': [(0, 'Therapist: How have you found yourself feeling most anxious or distressed since we last spoke, and is there a particular aspect of your life or a specific event that has been contributing to these feelings?\n'), (1, "Patient: I've been experiencing a lot of stress about my daily commute, just feeling really on edge whenever I'm on public transportation and I'm worried I'll get stuck without a signal. It's especially bad if the bus is delayed, I feel like time is just slipping away and I'm not in control of anything.\n"), (2, 'Therapist: That sounds incredibly frustrating, can you tell me more about what you mean by "feeling on edge" - what specifically are you anticipating migh

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 291.11 toks/s, output: 83.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 244.47 toks/s, output: 82.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 463.75 toks/s, output: 81.66 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 385.87 toks/s, output: 80.48 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 506.97 toks/s, output: 82.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 511.66 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 552.52 toks/s, output: 82.17 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 637.75 toks/s, output: 82.10 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 1037.46 toks/s, output: 81.69 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 865.47 toks/s, output: 81.78 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 549.50 toks/s, output: 81.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 535.27 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 573.58 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 468.54 toks/s, output: 82.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 394.36 toks/s, output: 81.45 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 888.87 toks/s, output: 79.52 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 838.62 toks/s, output: 80.05 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 929.40 toks/s, output: 80.76 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 2013.63 toks/s, output: 79.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 520.60 toks/s, output: 80.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 431.95 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 381.30 toks/s, output: 82.05 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 533.78 toks/s, output: 81.22 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 564.15 toks/s, output: 82.38 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I fled my homeland and now I can’t find belonging anywhere—I feel like I’m nobody. I speak in nostalgic anecdotes about home and press the therapist for historical context, avoiding my present loneliness.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, and what you hope to achieve from our sessions together? What specific challenges or areas of your life have been causing you distress or discomfort lately?\n'), (1, "Patient: I've been having trouble sleeping lately, and when I do, I'm haunted by the smell of my mother's cooking – it's this old family recipe for a traditional dish that my grandmother used to make for our family gatherings on special occasions. It's silly, but it always brings back memories of my childhood and the sense of warmth and connection I felt then.\n"), (2, "Therapist: That sounds like a bittersweet smell - both nostalgic and unsettling. Can

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.77it/s, est. speed input: 582.28 toks/s, output: 83.18 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 267.94 toks/s, output: 82.02 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 360.06 toks/s, output: 81.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 592.36 toks/s, output: 81.19 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 510.92 toks/s, output: 81.74 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 464.93 toks/s, output: 82.17 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 534.76 toks/s, output: 82.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 829.09 toks/s, output: 81.64 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 826.53 toks/s, output: 81.77 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 765.81 toks/s, output: 81.27 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 545.16 toks/s, output: 80.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 385.95 toks/s, output: 82.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 546.38 toks/s, output: 82.23 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 435.19 toks/s, output: 82.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 393.83 toks/s, output: 81.96 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 623.30 toks/s, output: 80.87 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 659.92 toks/s, output: 81.65 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 989.09 toks/s, output: 80.74 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 1155.69 toks/s, output: 80.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 342.46 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 452.45 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.22it/s, est. speed input: 692.44 toks/s, output: 82.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 457.13 toks/s, output: 82.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 374.01 toks/s, output: 82.45 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I obsess over citations and fear I’ll be accused of academic dishonesty. I lecture on referencing styles and journal policies, then shut down when asked how the fear affects my teaching or sleep.', 'conversation': [(0, 'Therapist: How have you found it helpful or not helpful to come in here today and discuss these feelings with me?\n'), (1, 'Patient: "It\'s been a bit surreal, to be honest, like talking about this stuff out loud is making it feel more real, and I\'m not sure if that\'s a good or bad thing. I\'m not even sure if I\'m having a breakthrough or just digging myself into a deeper hole."\n'), (2, "Therapist: That's a very nuanced and honest assessment of the process - it sounds like you're experiencing a bit of discomfort with the uncertainty of this process. What are some thoughts that come up for you when you consider the possibility that talking about these feelings might be helping you, even if it feels u

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.85it/s, est. speed input: 440.24 toks/s, output: 82.90 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 344.22 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 357.84 toks/s, output: 81.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 372.86 toks/s, output: 81.70 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 505.57 toks/s, output: 81.50 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 511.68 toks/s, output: 81.78 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 575.01 toks/s, output: 82.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it, est. speed input: 550.96 toks/s, output: 82.02 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 664.24 toks/s, output: 82.10 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1096.05 toks/s, output: 81.53 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 507.35 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 622.90 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 489.31 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 481.86 toks/s, output: 82.21 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 496.11 toks/s, output: 82.47 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 717.21 toks/s, output: 81.05 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 1046.73 toks/s, output: 80.12 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s, est. speed input: 1843.83 toks/s, output: 79.31 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 1213.23 toks/s, output: 80.79 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 479.88 toks/s, output: 82.17 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 374.88 toks/s, output: 82.64 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 553.53 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.71it/s, est. speed input: 907.87 toks/s, output: 81.54 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 410.15 toks/s, output: 82.57 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'My little café is drowning in debt. I work 16-hour days but can’t keep up with bills. I present profit-and-loss statements and restructuring plans, then avoid sharing the despair I feel when I see red numbers.', 'conversation': [(0, "Therapist: Before we dive into today's session, can you tell me what brings you here and what you hope to work on during our time together?\n"), (1, "Patient: I've been feeling overwhelmed and like I'm losing control, like I'm just going through the motions every day without a clear sense of purpose or direction. I'm hoping to find some clarity and figure out how to break free from this cycle.\n"), (2, 'Therapist: I\'m glad you\'ve taken the first step by acknowledging the feeling of being stuck – it takes a lot of courage to admit that. Can you tell me more about what you mean by "going through the motions" – is it specific tasks, relationships, or a sense of emotional numbness?\n'), (3, 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.19it/s, est. speed input: 338.03 toks/s, output: 83.41 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 303.45 toks/s, output: 82.12 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 397.08 toks/s, output: 82.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 435.04 toks/s, output: 82.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 723.80 toks/s, output: 82.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 538.07 toks/s, output: 81.95 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 545.28 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 653.05 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s, est. speed input: 1356.19 toks/s, output: 81.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 813.64 toks/s, output: 82.02 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.16s/it, est. speed input: 333.43 toks/s, output: 81.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 534.96 toks/s, output: 81.88 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 465.09 toks/s, output: 82.00 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 450.35 toks/s, output: 82.49 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 472.45 toks/s, output: 82.06 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s, est. speed input: 960.93 toks/s, output: 80.41 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 721.21 toks/s, output: 81.73 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 977.62 toks/s, output: 80.82 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 1085.23 toks/s, output: 80.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 515.76 toks/s, output: 80.80 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 559.96 toks/s, output: 82.61 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 490.37 toks/s, output: 82.25 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 375.73 toks/s, output: 81.78 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 464.07 toks/s, output: 82.24 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’ve been in remission for a year, but each follow-up scan fills me with dread. I detail scan schedules and lab protocols but refuse to talk about the panic in my chest when I hear my appointment time.', 'conversation': [(0, "Therapist: I'm here to listen and help you explore your thoughts and feelings. Can you tell me what brings you to therapy at this time and what you hope to achieve in our sessions together?\n"), (1, "Patient: I'm here because I'm stuck in a rut and can't shake this feeling of impending doom, no matter how many positive updates I get about my health. I guess I'm hoping to figure out why I'm still so anxious about something that's supposed to be a routine check-up.\n"), (2, "Therapist: You've been trying to focus on the positive updates, but that feeling of impending doom is persistent - what do you think might happen if you were to accept or acknowledge that this anxiety might not be entirely ratio

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.19it/s, est. speed input: 493.50 toks/s, output: 83.31 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 299.08 toks/s, output: 81.84 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 342.30 toks/s, output: 81.68 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 592.22 toks/s, output: 80.67 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 468.53 toks/s, output: 81.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 626.03 toks/s, output: 80.40 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 613.95 toks/s, output: 81.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 556.79 toks/s, output: 81.24 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.23s/it, est. speed input: 529.58 toks/s, output: 81.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 923.34 toks/s, output: 80.76 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.55s/it, est. speed input: 240.61 toks/s, output: 81.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s, est. speed input: 637.29 toks/s, output: 80.98 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 499.14 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 490.38 toks/s, output: 80.91 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 512.60 toks/s, output: 81.22 toks/s]


eval_index_consistency


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s, est. speed input: 1118.59 toks/s, output: 80.07 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 547.87 toks/s, output: 80.99 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 1521.22 toks/s, output: 79.18 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 1354.68 toks/s, output: 79.42 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 470.48 toks/s, output: 80.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 446.94 toks/s, output: 81.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 609.31 toks/s, output: 81.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 477.65 toks/s, output: 81.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 492.76 toks/s, output: 81.31 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'A torn ACL benched me last season; now I feel lost without my sport. I map out rehab routines and gym schedules but clam up when invited to explore grief over my lost athletic identity.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time and what you hope to achieve in our sessions together?\n'), (1, "Patient: I've just felt really stuck lately, like I'm just going through the motions of rehab and gym sessions without any sense of purpose or direction. I guess I'm hoping to figure out why I'm so resistant to facing what's really going on with my sports identity.\n"), (2, 'Therapist: Can you tell me more about what you mean by "stuck" - is it feeling unmotivated, uncertain about your athletic goals, or something else? What do you think would happen if you were to face what\'s really going on with your sports identity, and why might that be scary for you?\n'), (3, "Patien

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.92it/s, est. speed input: 451.09 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 263.92 toks/s, output: 81.60 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 491.49 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 341.49 toks/s, output: 81.91 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 563.45 toks/s, output: 81.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 518.35 toks/s, output: 81.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 785.63 toks/s, output: 80.61 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 754.22 toks/s, output: 81.67 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 941.76 toks/s, output: 81.45 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.29s/it, est. speed input: 567.39 toks/s, output: 81.16 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 554.43 toks/s, output: 81.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 522.67 toks/s, output: 81.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 548.10 toks/s, output: 82.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 434.08 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 521.50 toks/s, output: 82.28 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 612.70 toks/s, output: 81.43 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 1020.92 toks/s, output: 80.20 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 711.95 toks/s, output: 81.14 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.24s/it, est. speed input: 731.09 toks/s, output: 81.50 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s, est. speed input: 621.98 toks/s, output: 81.84 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 594.24 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.73it/s, est. speed input: 855.77 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 473.71 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 450.67 toks/s, output: 82.29 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’ve performed off-Broadway, but a single flub now makes me freeze under lights. I rehearse monologues and blocking with exacting precision but avoid discussing the terror I feel behind the curtain.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time, and what you hope to work on during our sessions together?\n'), (1, "Patient: I guess I just feel like I've lost touch with the excitement and joy that acting used to bring me, and I'm hoping to figure out how to get back to that place. I feel like I'm constantly second-guessing myself and it's affecting every aspect of my life, not just my performances.\n"), (2, 'Therapist: That sounds really challenging and frustrating. Can you tell me more about this sense of second-guessing yourself - what does it feel like, and when did you first notice that it started to impact your life in this way?\n'), (3, "Patient: I think it sta

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 164.45 toks/s, output: 83.29 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 402.42 toks/s, output: 82.06 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 493.93 toks/s, output: 82.06 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 446.32 toks/s, output: 82.38 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 662.68 toks/s, output: 82.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 581.39 toks/s, output: 82.20 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 641.68 toks/s, output: 82.06 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 835.03 toks/s, output: 81.78 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 727.81 toks/s, output: 82.02 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 893.22 toks/s, output: 81.91 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 454.99 toks/s, output: 81.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 422.88 toks/s, output: 82.62 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s, est. speed input: 585.68 toks/s, output: 82.38 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 365.21 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 551.81 toks/s, output: 82.42 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 654.78 toks/s, output: 81.20 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 930.66 toks/s, output: 81.68 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 1186.25 toks/s, output: 80.70 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 1481.07 toks/s, output: 80.52 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.71it/s, est. speed input: 787.34 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s, est. speed input: 738.84 toks/s, output: 82.36 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 396.56 toks/s, output: 82.67 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s, est. speed input: 748.46 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 522.62 toks/s, output: 82.34 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'My partner and I come from very different backgrounds, and families disapprove. I analyze cultural norms and family histories academically, but I steer clear of sharing how lonely I feel when relatives exclude me.', 'conversation': [(0, "Therapist: As we begin our session today, I want to acknowledge that it takes a lot of courage to share your thoughts and feelings with me, and I'm here to support you in exploring them in a safe and non-judgmental space. Can you start by telling me what brings you to therapy at this time and what are some of the specific challenges you're hoping to work through?\n"), (1, "Patient: I've been feeling really disconnected from my emotions lately, and I'm struggling to identify what I want in my personal relationships, especially romantic ones. It's like I'm going through the motions, but I'm not feeling the depth of connection I crave.\n"), (2, 'Therapist: That sounds like a really painfu

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  3.60it/s, est. speed input: 556.81 toks/s, output: 83.15 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 325.57 toks/s, output: 82.21 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 320.89 toks/s, output: 82.73 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 434.63 toks/s, output: 82.00 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.11it/s, est. speed input: 429.59 toks/s, output: 82.35 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 604.93 toks/s, output: 82.25 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 783.42 toks/s, output: 81.91 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 676.54 toks/s, output: 82.07 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 716.50 toks/s, output: 82.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 623.61 toks/s, output: 82.14 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 503.17 toks/s, output: 82.04 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 742.24 toks/s, output: 82.47 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 551.19 toks/s, output: 82.68 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 541.09 toks/s, output: 82.28 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 608.76 toks/s, output: 81.95 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 898.28 toks/s, output: 80.41 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 953.20 toks/s, output: 80.88 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 858.05 toks/s, output: 81.94 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 938.27 toks/s, output: 81.18 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s, est. speed input: 668.23 toks/s, output: 81.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 554.62 toks/s, output: 82.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.22it/s, est. speed input: 662.36 toks/s, output: 82.24 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 571.71 toks/s, output: 82.20 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 596.48 toks/s, output: 82.15 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'My kids just left for college and I don’t know what to do with my days. I dive into home renovation blueprints and structural plans but evade any talk about my own sense of loss.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you here today and what you hope to achieve in our sessions together?\n'), (1, "Patient: I guess I just feel really lost, like I'm stuck in a holding pattern. I've been keeping myself busy with work and renovating the house, but it's like I'm avoiding something and I don't know how to move forward.\n"), (2, "Therapist: It sounds like you're acknowledging that you're engaging in activities that might be serving as a distraction from something specific, which is a great first step in recognizing patterns. Can you think of a moment or experience that feels like the root of this feeling of being stuck, something that you've been trying to push away or avoid?\n"), (3, "Patien

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.97it/s, est. speed input: 459.06 toks/s, output: 83.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 268.16 toks/s, output: 82.41 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 308.32 toks/s, output: 82.14 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 410.82 toks/s, output: 82.16 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 530.57 toks/s, output: 82.11 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.14s/it, est. speed input: 423.02 toks/s, output: 82.32 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s, est. speed input: 640.86 toks/s, output: 81.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 569.26 toks/s, output: 80.82 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 740.71 toks/s, output: 80.55 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 860.31 toks/s, output: 79.76 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 592.02 toks/s, output: 79.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 544.19 toks/s, output: 82.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 564.49 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 464.45 toks/s, output: 81.70 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 535.19 toks/s, output: 81.41 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 842.05 toks/s, output: 80.44 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 659.42 toks/s, output: 80.17 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 864.95 toks/s, output: 81.09 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 1699.53 toks/s, output: 79.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s, est. speed input: 556.88 toks/s, output: 81.40 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 535.62 toks/s, output: 82.00 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 380.83 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s, est. speed input: 574.30 toks/s, output: 81.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 660.78 toks/s, output: 81.29 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I stream eight hours a day chasing views and subscriber milestones—and I feel lost when offline. I discuss growth algorithms, engagement metrics, and content calendars but avoid admitting how empty it feels without the chat feed.', 'conversation': [(0, 'Therapist: As we begin, can you describe what brings you to our online therapy session today and what you hope to achieve from our work together?\n'), (1, "Patient: I've been struggling to manage my anxiety and feelings of inadequacy, even when I reach my streaming goals – it's like there's a constant sense of emptiness beneath the surface. I'm hoping to figure out why I feel this way and if there's a way to break free from this cycle.\n"), (2, 'Therapist: That sounds like a really tough spot to be in - it\'s like you\'re achieving external validation but still feeling unfulfilled. Can you tell me more about what you mean by "emptiness" beneath the surface - is it a sen

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.68it/s, est. speed input: 412.96 toks/s, output: 83.13 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 324.60 toks/s, output: 80.39 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 374.39 toks/s, output: 81.02 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s, est. speed input: 641.68 toks/s, output: 81.66 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 650.37 toks/s, output: 81.95 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s, est. speed input: 737.88 toks/s, output: 81.79 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 575.71 toks/s, output: 81.72 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 704.71 toks/s, output: 81.76 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s, est. speed input: 741.52 toks/s, output: 81.85 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s, est. speed input: 1255.57 toks/s, output: 81.17 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 464.54 toks/s, output: 81.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 377.67 toks/s, output: 82.14 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 370.10 toks/s, output: 82.35 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 481.81 toks/s, output: 82.32 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.22s/it, est. speed input: 312.89 toks/s, output: 82.34 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 753.87 toks/s, output: 81.31 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 688.31 toks/s, output: 81.79 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 1035.08 toks/s, output: 81.59 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 1236.95 toks/s, output: 81.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.47it/s, est. speed input: 737.75 toks/s, output: 81.69 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 493.66 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 430.63 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 552.92 toks/s, output: 82.58 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 496.54 toks/s, output: 82.18 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I can’t touch door handles or shake hands; I wash my hands 50 times a day. I detail my cleaning agents, dilution ratios, and UV-sterilization protocols but bristle at exploring the fear driving it.', 'conversation': [(0, 'Therapist: Can you tell me more about what brings you to therapy at this time? What specific challenges or concerns do you hope to address in our work together?\n'), (1, 'Patient: "I\'ve been experiencing increased anxiety lately, feeling like my routines aren\'t enough to keep me safe, even with all the precautions I take. It\'s hard to explain, but I feel like I\'m constantly walking on edge, waiting for something to go wrong."\n'), (2, 'Therapist: That sounds incredibly unsettling, and I want to acknowledge that it takes a lot of courage to share this feeling with me. Can you tell me more about what you mean by "the edge" - what kind of scenarios or situations make you feel like you\'re teetering o

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.95it/s, est. speed input: 455.28 toks/s, output: 82.78 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 287.11 toks/s, output: 82.03 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 350.98 toks/s, output: 82.20 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 390.65 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 481.61 toks/s, output: 82.05 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 475.59 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 609.41 toks/s, output: 81.76 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 502.68 toks/s, output: 82.18 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s, est. speed input: 1136.37 toks/s, output: 81.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 918.44 toks/s, output: 81.84 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 660.70 toks/s, output: 80.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 512.88 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s, est. speed input: 653.98 toks/s, output: 81.95 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 531.12 toks/s, output: 82.00 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it, est. speed input: 349.38 toks/s, output: 82.42 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 740.42 toks/s, output: 80.71 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 724.66 toks/s, output: 81.15 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 1107.10 toks/s, output: 79.75 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 1232.27 toks/s, output: 80.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 365.27 toks/s, output: 82.00 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 411.38 toks/s, output: 82.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s, est. speed input: 523.38 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.26it/s, est. speed input: 768.34 toks/s, output: 81.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 461.19 toks/s, output: 82.35 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I’m consumed by climate change data and feel paralyzed by dread for the planet’s future. I quote IPCC reports and list carbon-reduction strategies but avoid sharing the hopelessness I feel at night.', 'conversation': [(0, "Therapist: Can you tell me more about what brings you to therapy at this time and what you're hoping to work on during our sessions together?\n"), (1, "Patient: I'm hoping to break free from this constant cycle of research and anxiety, but I'm not sure how to let go without feeling like I'm abandoning the planet. I guess I'm hoping we can explore ways to reconcile my desire to act with the overwhelm that's been stopping me.\n"), (2, "Therapist: It sounds like you're feeling a strong sense of responsibility for the well-being of the planet, and that's contributing to your feelings of overwhelm and anxiety. Can you tell me more about what specifically triggers this sense of responsibility in you - is i

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 196.72 toks/s, output: 83.03 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s, est. speed input: 434.38 toks/s, output: 81.78 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 558.19 toks/s, output: 81.91 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 373.23 toks/s, output: 82.31 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s, est. speed input: 677.41 toks/s, output: 82.01 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 682.16 toks/s, output: 81.92 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 750.58 toks/s, output: 81.96 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 606.13 toks/s, output: 82.36 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 762.18 toks/s, output: 81.87 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 1039.61 toks/s, output: 80.93 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 466.41 toks/s, output: 81.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s, est. speed input: 545.37 toks/s, output: 81.31 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 549.85 toks/s, output: 81.51 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 459.25 toks/s, output: 81.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s, est. speed input: 544.17 toks/s, output: 81.48 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s, est. speed input: 887.30 toks/s, output: 80.49 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 824.42 toks/s, output: 81.47 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 1140.80 toks/s, output: 80.27 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 1087.00 toks/s, output: 80.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 363.67 toks/s, output: 81.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 414.46 toks/s, output: 82.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.71it/s, est. speed input: 496.78 toks/s, output: 82.22 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 397.54 toks/s, output: 82.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 463.27 toks/s, output: 82.22 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'My home burned down two years ago. I still have nightmares of flames and smoke. I recount the sequence of events in precise detail but go silent when asked how I cope with triggers now.', 'conversation': [(0, "Therapist: As we begin our session today, I want to acknowledge that it takes a lot of courage to be here, and I'm grateful you've taken this step towards exploring your thoughts and feelings. Can you tell me what brings you to therapy at this particular time, and what are some of the challenges you're currently facing?\n"), (1, "Patient: I've been noticing that I've been having trouble sleeping through the night lately, and it's affecting my daily routine and mood. I feel restless and irritable, and I'm hoping to figure out why that is.\n"), (2, "Therapist: Can you tell me more about this restlessness and irritability you've been experiencing - are there any specific situations or triggers that tend to set it of

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.19it/s, est. speed input: 337.38 toks/s, output: 83.25 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s, est. speed input: 370.52 toks/s, output: 81.96 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 471.91 toks/s, output: 82.45 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 438.52 toks/s, output: 81.87 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 479.66 toks/s, output: 82.26 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 479.84 toks/s, output: 82.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 557.64 toks/s, output: 81.94 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 817.61 toks/s, output: 81.61 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 672.23 toks/s, output: 81.88 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 993.31 toks/s, output: 81.90 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 419.19 toks/s, output: 81.82 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 513.64 toks/s, output: 82.71 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 542.56 toks/s, output: 82.33 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 515.13 toks/s, output: 82.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 518.85 toks/s, output: 82.69 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s, est. speed input: 711.69 toks/s, output: 82.05 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s, est. speed input: 1007.82 toks/s, output: 80.76 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 1056.91 toks/s, output: 81.08 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 1496.42 toks/s, output: 80.34 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 388.45 toks/s, output: 82.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 404.30 toks/s, output: 82.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.49it/s, est. speed input: 471.11 toks/s, output: 81.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s, est. speed input: 534.99 toks/s, output: 82.03 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s, est. speed input: 606.78 toks/s, output: 82.10 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I was assaulted in college. Creativity used to help me heal, but now I can’t draw the trauma without shutting down. I show storyboard panels and character sketches but refuse to name the feelings behind my images.', 'conversation': [(0, 'Therapist: I want to start by acknowledging that it takes a lot of courage to share your story with me today. Can you tell me what brings you to therapy at this point in your life?\n'), (1, "Patient: I just feel stuck, like I'm going through the motions of my daily life but not really living in the moment. I've been using art as a way to cope, but it's become a source of frustration for me now.\n"), (2, "Therapist: It sounds like you're seeking more depth and fulfillment in your life, but your creative outlet, which initially brought you joy, is now causing frustration – can you tell me more about what specifically is triggering that frustration with your art?\n"), (3, "Patient: When I

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s, est. speed input: 284.21 toks/s, output: 83.04 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 322.86 toks/s, output: 81.81 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 481.84 toks/s, output: 81.43 toks/s]


Expected Role Therapist


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  3.10it/s, est. speed input: 1023.09 toks/s, output: 80.85 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 551.11 toks/s, output: 82.21 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 582.96 toks/s, output: 81.87 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 556.84 toks/s, output: 81.54 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 850.42 toks/s, output: 81.14 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s, est. speed input: 786.21 toks/s, output: 80.70 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 701.73 toks/s, output: 81.40 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 378.57 toks/s, output: 81.55 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 369.17 toks/s, output: 82.16 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 533.31 toks/s, output: 81.83 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 382.50 toks/s, output: 82.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 515.46 toks/s, output: 81.38 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 592.33 toks/s, output: 81.89 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 917.83 toks/s, output: 81.32 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  2.51it/s, est. speed input: 1715.36 toks/s, output: 80.48 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 767.62 toks/s, output: 81.39 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 423.05 toks/s, output: 82.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s, est. speed input: 461.99 toks/s, output: 82.56 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 476.73 toks/s, output: 82.41 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 336.22 toks/s, output: 82.60 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s, est. speed input: 635.51 toks/s, output: 82.45 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I pull out my hair when I’m anxious, and now I have noticeable patches on my scalp. I explain scalp-care products and concealment techniques, then clam up when asked about the urges themselves.', 'conversation': [(0, "Therapist: I want to start by acknowledging that it takes a lot of courage to share your struggles with me today. Can you tell me what brings you to our session and what you're hoping to work on during our time together?\n"), (1, "Patient: I guess I'm hoping to figure out how to manage my emotions better, but to be honest, I'm not even sure where to start. I feel like I've been dealing with this for so long, I'm not really sure what's normal or what's not.\n"), (2, 'Therapist: Can you tell me more about what you mean by "not sure what\'s normal or what\'s not" - is there something in particular that\'s been troubling you about your emotions that you feel deviates from what others seem to experience?\n'), 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s, est. speed input: 303.30 toks/s, output: 82.71 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 294.73 toks/s, output: 82.08 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 346.04 toks/s, output: 82.27 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 364.99 toks/s, output: 81.91 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 622.72 toks/s, output: 81.22 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.00s/it, est. speed input: 491.92 toks/s, output: 81.98 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 857.84 toks/s, output: 81.34 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 592.51 toks/s, output: 81.43 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 745.34 toks/s, output: 81.10 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.23s/it, est. speed input: 667.79 toks/s, output: 81.34 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 560.18 toks/s, output: 80.44 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.02s/it, est. speed input: 394.20 toks/s, output: 81.59 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 539.68 toks/s, output: 81.48 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 588.36 toks/s, output: 81.99 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 502.91 toks/s, output: 82.23 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s, est. speed input: 808.13 toks/s, output: 80.97 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 930.64 toks/s, output: 80.98 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 1061.62 toks/s, output: 80.54 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 955.67 toks/s, output: 81.09 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s, est. speed input: 452.68 toks/s, output: 81.75 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 463.44 toks/s, output: 81.87 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 460.78 toks/s, output: 82.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 459.57 toks/s, output: 82.01 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s, est. speed input: 672.20 toks/s, output: 81.54 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I want to leave tech for social impact work but fear admitting I can’t “hack it” here. I debate market trends and impact metrics but steer away from discussing my own satisfaction or fear of failure.', 'conversation': [(0, "Therapist: I notice you've been expressing a sense of frustration with your job lately, but you've also mentioned enjoying certain aspects of it. Can you tell me more about what you think is causing this conflicting feeling?\n"), (1, "Patient: I guess it's just that I'm drawn to the idea of doing work that makes a tangible difference, but the more I learn about it, the more I feel overwhelmed by the scale and complexity of social impact initiatives. I'm starting to wonder if I'm really equipped to make a meaningful contribution.\n"), (2, "Therapist: It sounds like you're feeling both the desire to make a difference and the fear of being overwhelmed by the magnitude of that goal, which is a common di

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 227.28 toks/s, output: 82.64 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 416.28 toks/s, output: 80.74 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 331.14 toks/s, output: 81.62 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 629.94 toks/s, output: 80.76 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it, est. speed input: 386.28 toks/s, output: 81.47 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 498.65 toks/s, output: 81.22 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s, est. speed input: 1068.52 toks/s, output: 80.46 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 682.94 toks/s, output: 81.59 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 802.41 toks/s, output: 81.28 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 925.83 toks/s, output: 81.13 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 369.24 toks/s, output: 81.94 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 436.38 toks/s, output: 81.30 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s, est. speed input: 616.25 toks/s, output: 81.85 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s, est. speed input: 584.27 toks/s, output: 81.77 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s, est. speed input: 615.98 toks/s, output: 80.96 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 659.93 toks/s, output: 80.58 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s, est. speed input: 865.71 toks/s, output: 80.41 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s, est. speed input: 1327.73 toks/s, output: 79.42 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 1180.54 toks/s, output: 80.08 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s, est. speed input: 587.56 toks/s, output: 81.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s, est. speed input: 526.22 toks/s, output: 82.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.55it/s, est. speed input: 489.54 toks/s, output: 82.10 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s, est. speed input: 497.78 toks/s, output: 81.89 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s, est. speed input: 578.36 toks/s, output: 81.54 toks/s]


{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'My 8-year-old needs constant therapy and I’ve neglected my own medical appointments. I outline therapy schedules and IEP goals in exhaustive detail, then change the topic when asked how I’m coping.', 'conversation': [(0, "Therapist: I'm glad you're taking this step to work on your mental health, and I'm here to support you in a non-judgmental space. Can you tell me what brings you to therapy at this time and what you hope to work on during our sessions together?\n"), (1, "Patient: I've been overwhelmed with my daughter's therapy sessions and feeling like I'm not doing enough for her, which is causing me a lot of anxiety. I just want to make sure she's getting the best care possible.\n"), (2, "Therapist: It sounds like a significant amount of your anxiety stems from feeling a sense of responsibility for your daughter's well-being, and you're putting pressure on yourself to be the one to provide the perfect solution. Can

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s, est. speed input: 294.36 toks/s, output: 82.19 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s, est. speed input: 307.29 toks/s, output: 81.30 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s, est. speed input: 406.87 toks/s, output: 81.37 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s, est. speed input: 647.20 toks/s, output: 81.13 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 481.35 toks/s, output: 81.67 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s, est. speed input: 901.04 toks/s, output: 81.01 toks/s]


Expected Role Patient


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 633.37 toks/s, output: 81.07 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.25it/s, est. speed input: 704.91 toks/s, output: 81.09 toks/s]


Expected Role Patient


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s, est. speed input: 1158.02 toks/s, output: 75.00 toks/s]


Expected Role Therapist


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 815.57 toks/s, output: 81.20 toks/s]


Expected Role Patient
eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 437.48 toks/s, output: 80.26 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 618.09 toks/s, output: 81.63 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.67it/s, est. speed input: 617.00 toks/s, output: 81.93 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 466.85 toks/s, output: 81.96 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 453.09 toks/s, output: 82.17 toks/s]


eval_index_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 748.25 toks/s, output: 79.53 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 571.00 toks/s, output: 81.43 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.20it/s, est. speed input: 854.50 toks/s, output: 81.72 toks/s]
Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 1343.16 toks/s, output: 81.11 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s, est. speed input: 570.34 toks/s, output: 81.74 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s, est. speed input: 457.89 toks/s, output: 82.22 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s, est. speed input: 430.47 toks/s, output: 82.43 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 449.59 toks/s, output: 82.27 toks/s]


eval_prompt_consistency
eval_prompt_consistency


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 418.63 toks/s, output: 82.37 toks/s]

{'task_name': 'Therapy', 'topic': '', 'grade': '', 'P1': '', 'P2': 'I drink before every show to calm nerves, and now it’s a crutch I can’t do without. I talk about setlists, soundchecks, and travel logistics but deny any dependency when asked about drinking habits.', 'conversation': [(0, "Therapist: I've taken note that you've been struggling to establish a consistent sleep schedule since the change in your job. Can you tell me more about how this has been impacting your daily life, beyond just sleep quality?\n"), (1, "Patient: It's been affecting my energy levels, making it tough to get through the day without relying on... extracurricular activities to take the edge off. I've noticed I'm also getting irritable with my crew members, which is not like me, and it's causing tension backstage.\n"), (2, 'Therapist: It sounds like you\'re feeling a significant impact on your daily functioning and relationships, which can be really challenging. Can you help me understand what you mean by "e




In [19]:
len(conversations)

100