In [1]:
%env CUDA_VISIBLE_DEVICES=0

import os
import logging

os.environ.pop("HF_HUB_OFFLINE", None)
logging.getLogger().setLevel(logging.ERROR)  # or logging.CRITICAL

import torch
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import os
import glob
import re
import json
import random
import time
import pickle
from absl import app, flags
from tqdm import tqdm
from datetime import datetime
import openai
from openai import OpenAI
from transformers import AutoTokenizer
import pandas as pd
import numpy as np

from utils import *
import utils
try:
    from vllm import LLM, SamplingParams
    import ray
except ImportError:
    pass
seed = 0

env: CUDA_VISIBLE_DEVICES=0
INFO 04-28 18:04:38 [__init__.py:239] Automatically detected platform cuda.


In [2]:
import subprocess
import torch
def get_freest_cuda_device():
    result = subprocess.run(
        ['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'],
        stdout=subprocess.PIPE, encoding='utf-8')
    memory_free = [int(x) for x in result.stdout.strip().split('\n')]
    return memory_free.index(max(memory_free))

best_gpu = get_freest_cuda_device()
device = torch.device(f"cuda:{best_gpu}")
print(f"Using GPU: {device}")
# %env CUDA_VISIBLE_DEVICES=0

Using GPU: cuda:2


In [3]:
with open(os.path.abspath('../openai_key'), 'r') as f:
    utils.client = OpenAI(api_key=f.read().rstrip('\n'))

In [4]:
import os

# Explicitly unset all offline-related env vars
os.environ.pop("HF_HUB_OFFLINE", None)
os.environ.pop("TRANSFORMERS_OFFLINE", None)
os.environ["HF_HUB_OFFLINE"] = "0"
os.environ["TRANSFORMERS_OFFLINE"] = "0"

with open("../token.txt", "r") as f:
    token = f.read().strip()

from huggingface_hub import login
login(token=token)

In [5]:
def count_words(text):
    """
    Counts the number of words in the given text.

    Args:
        text (str): Input text.

    Returns:
        int: Number of words in the text.
    """
    if text!=None:
        words = text.split()
        return len(words)
    else:
        return 0


In [6]:
personas = [
  # Elementary
  {
    "grade_level": "elementary school",
    "description": (
      "As an elementary Narrative learner, I absorb new concepts best when they’re told as engaging mini-stories. "
      "In dialogue, I ask for short anecdotes that turn any abstract idea into a vivid tale with characters, a clear sequence, and an emotional hook. "
      "Stories help me remember causal links and keep details alive in my mind."
    )
  },
  {
    "grade_level": "elementary school",
    "description": (
      "As an elementary Kinesthetic learner, I understand ideas by imagining myself performing them. "
      "In conversation, I ask you to guide me through a pretend play-through—verbally walking me step by step as if I’m enacting a simple experiment or physical process. "
      "This imagined movement helps me anchor concepts in ‘muscle memory’ even though we’re only talking."
    )
  },
  {
    "grade_level": "elementary school",
    "description": (
      "As an elementary Naturalistic learner, I connect best when content is tied to the natural world through vivid imagery. "
      "In dialogue, I ask you to compare topics—like atomic structure—to things I observe outdoors, such as tree rings or bird migrations. "
      "These verbal nature metaphors make new information feel familiar and alive."
    )
  },
  {
    "grade_level": "elementary school",
    "description": (
      "As an elementary Experiential learner, I learn by mentally simulating real-world tasks. "
      "In conversation, I ask you to walk me through building or testing something—describing each step as if I’m doing it. "
      "That imagined ‘doing’ makes concepts concrete, even though we remain in chat."
    )
  },
  {
    "grade_level": "elementary school",
    "description": (
      "As an elementary Creative-Divergent learner, I thrive on brainstorming multiple possibilities. "
      "In dialogue, I propose ‘what if’ scenarios—like alternative endings or playful twists on a concept—and talk through each idea. "
      "Verbal brainstorming reveals fresh patterns and sparks my imagination."
    )
  },

  # Middle
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Visual-Spatial learner, I think in mental images and diagrams. "
      "In conversation, I ask you to ‘paint’ word-pictures—step-by-step descriptions of scenes or flows—so I can build a clear mental map. "
      "That verbal imagery helps me organize information spatially in my mind."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Auditory learner, I internalize knowledge through sound and speech. "
      "In dialogue, I ask you to restate key points in different rhythms or tones, and I repeat them back to reinforce my memory. "
      "Hearing and echoing concepts in conversation makes them stick."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Logical-Mathematical learner, I seek numerical patterns and rule-based reasoning. "
      "In dialogue, I pose ‘what-if’ questions—‘If X doubles, what changes?’—and we talk through each scenario using simple calculations. "
      "Quantitative hypotheticals build my systematic understanding."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Analytical-Argument learner, I dissect arguments and causal chains. "
      "In conversation, I ask targeted ‘why’ and ‘how’ questions about each step, construct mini flow-charts aloud, and verify the logic with you. "
      "This structured debate hones my precision in reasoning."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Verbal-Linguistic learner, I learn through rich language and writing. "
      "In dialogue, I request carefully worded definitions, paraphrase ideas in my own words, and craft mnemonic rhymes on the spot. "
      "Talking through ideas in text-like sentences and playing with words helps me remember precisely."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Technology-Enhanced learner, I thrive on conversational simulations of digital tools. "
      "In dialogue, I ask you to describe how a virtual model might respond as we adjust parameters, or to role-play a flashcard quiz verbally. "
      "These imagined tech interactions keep me engaged without leaving our chat."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Mnemonic learner, I anchor facts with memory aids. "
      "In dialogue, I ask for catchy acronyms, rhymes, or vivid mental images—then recite them back. "
      "That verbal encoding makes complex lists or steps easy to retrieve."
    )
  },
  {
    "grade_level": "middle school",
    "description": (
      "As a middle school Emotional learner, I connect through feelings and empathy. "
      "In conversation, I ask you to frame concepts in human-centered narratives that highlight emotional stakes. "
      "These emotionally rich verbal stories make ideas memorable and meaningful."
    )
  },

  # High School
  {
    "grade_level": "high school",
    "description": (
      "As a high school Collaborative learner, I excel in multi-voice discussions. "
      "In dialogue, I invite hypothetical peers into our chat—debating viewpoints, role-playing characters, or comparing interpretations. "
      "That social exchange refines my understanding."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Interpersonal learner, I flourish in one-on-one exchanges. "
      "In conversation, I engage deeply with a single partner—asking questions, providing feedback, and co-constructing ideas through back-and-forth talk."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Reflective learner, I pause and summarize before responding. "
      "In dialogue, I restate points in my own words, journal key ideas mentally, and then ask precise follow-ups. "
      "This verbal reflection clarifies gaps and deepens comprehension."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Metaphorical learner, I anchor concepts in analogies. "
      "In dialogue, I ask you to compare subjects to familiar scenarios—‘It’s like X because…’—and we talk through how well the metaphor holds. "
      "Testing analogies verbally helps me translate abstract ideas into relatable terms."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Intrapersonal learner, I connect content to my own values. "
      "In dialogue, I ask how topics relate to my goals or experiences and share personal reflections aloud. "
      "That self-referential talk makes learning relevant and motivating."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Problem-Based learner, I tackle hypothetical real-world scenarios in talk. "
      "In dialogue, I propose case studies—like designing a sustainable system—and we walk through each decision together. "
      "Verbal scenario-based reasoning shows me practical applications of theory."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Trial-and-Error learner, I learn by mentally testing ideas. "
      "In dialogue, I suggest imagined experiments—‘Let’s tweak this variable and see what happens’—and we discuss the outcomes. "
      "Using mistakes as discussion points builds discovery-based understanding."
    )
  },
  {
    "grade_level": "high school",
    "description": (
      "As a high school Conceptual learner, I focus on verbal mapping of frameworks. "
      "In dialogue, I request thematic overviews—described step by step—and we discuss how each piece fits into the big picture. "
      "Building mental models in talk deepens my flexible understanding."
    )
  },

  # College
  {
    "grade_level": "college",
    "description": (
      "As a college Theoretical learner, I probe abstract frameworks in conversation. "
      "In dialogue, I challenge you to trace ideas back to their assumptions, compare theoretical models, and debate implications. "
      "This verbal inquiry drives deep synthesis."
    )
  },
  {
    "grade_level": "college",
    "description": (
      "As a college Research-Oriented learner, I learn by interrogating studies in chat. "
      "In dialogue, I ask for summaries of current research, discuss methods and controls, and role-play peer-review feedback. "
      "Critically evaluating evidence through talk builds an evidence-based grasp."
    )
  },
  {
    "grade_level": "college",
    "description": (
      "As a college Integrative learner, I weave ideas together verbally. "
      "In conversation, I ask for cross-topic syntheses—connecting historical, artistic, and scientific themes—and discuss their intersections step by step. "
      "This systems-level perspective helps me approach complex questions creatively."
    )
  },
  {
    "grade_level": "college",
    "description": (
      "As a college Structured learner, I excel on verbal outlines and modules. "
      "In dialogue, I ask for hierarchical breakdowns—numbered lists, staged explanations, and schematic overviews—before diving into details."
    )
  },
  {
    "grade_level": "college",
    "description": (
      "As a college Solitary learner, I prefer self-guided dialog prompts. "
      "In our conversation, I request personalized questions and silent think-time before sharing my conclusions, using chat as a safe space for independent reflection."
    )
  },
  {
    "grade_level": "college",
    "description": (
      "As a college Adaptive learner, I shift strategies based on what works. "
      "In dialogue, I monitor which verbal approaches—stories, logic puzzles, analogies—help me most and ask to switch accordingly. "
      "This dynamic, metacognitive talk ensures I absorb concepts through the most effective modality."
    )
  }
]


In [7]:
import json
import shutil
import os

config_role = {
    "agent1_role": "Teacher",
    "agent2_role": "Student",
    "agent1_prompt": "You are a teacher whose goal is to guide a student through learning about %SUBJECT%. You have a preferred way to teach the student. The student is in %ROLE% so make sure to teach them at their level. ",
    "agent2_prompt": "You are a student in %ROLE% in conversation with a teacher who will teach you %SUBJECT%. You like to learn in the following way:\n%SPEAKER_BACKSTORY%.\nMake sure to not only ask questions but also demonstrate your knowledge.",
    'reminder_prompt': "Keep your response very brief — 2 sentences or less. Do NOT repeat anything you've already said.\n",
    'eval_prompts': {
    'prompt_consistency': """You are evaluating whether the following utterance by %SPEAKER_ROLE% is consistent with their given background. Answer YES if the line directly contradicts any part of the background—this includes contradictions in facts, behavior, beliefs, emotional stance, or conversational strategy. Answer NO if the line introduces new details, but those details are **plausibly aligned** with the background. Be strict and literal in your evaluation: if the line violates the persona’s stated traits (e.g., avoids vulnerability, keeps answers short, avoids discussing personal topics), mark it as a contradiction even if the violation is subtle. Background for %SPEAKER_ROLE%: %SPEAKER_BACKSTORY% Line spoken by %SPEAKER_ROLE%: %SPEAKER_LINE%  Answer YES if the line contradicts the background, and NO if it does not. Then, explain your answer in one sentence. Be precise and avoid vague justification."""}}

os.makedirs("education", exist_ok=True)
with open("education/config_education.json", "w", encoding="utf-8") as f:
    json.dump(config_role, f, indent=4)

In [8]:
with open('education/personas_education_master.json', 'r') as f:
    conversation_prompts = json.load(f)
conversation_prompts[0]['background_info'].keys()

dict_keys(['topic', 'student_prefrences', 'teacher_prefrences', 'student_reactions', 'teacher_reactions'])

In [9]:
topic_list = []
for convo_prompt in conversation_prompts:
    topic_prompt = convo_prompt["background_info"]["topic"]
    topic_list.append(topic_prompt)

In [10]:
topic_list

["Archimedes' Principle",
 'Winston Churchill',
 'World War 2',
 'The Cold War',
 'Evolution',
 'Foreshadowing',
 'The American Civil War',
 'The Eiffel TowerConfucius',
 'Atoms',
 'Christopher Columbus',
 'Napoleon Bonaparte',
 'Genghis Khan',
 'The White House',
 'Martin Luther King',
 'World War 2',
 'Molecules',
 'Julius Caesar',
 'Poseidon',
 'The Hagia Sophia',
 'Salvador Dali',
 'DNA',
 'The Hagia Sophia',
 'Evolution',
 'Leonardo da Vinci',
 'Albert Einstein',
 'Democracy',
 'The Pyramids',
 'Martin Luther King',
 'The Palace of Versailles',
 'The Nervous System',
 'The French Revolution',
 'The Russian Revolution',
 'Salvador Dali',
 'The Periodic Table',
 'Marie Curie',
 'Ares',
 'Cells',
 'Ares',
 'Molecules',
 'The Eiffel TowerConfucius',
 'Angkor Wat',
 'Julius Caesar',
 "Archimedes' Principle",
 'Communism',
 'The Periodic Table',
 'Photosynthesis',
 'Pablo Picasso',
 'The Russian Revolution',
 'Elements',
 "Archimedes' Principle",
 'Angkor Wat',
 'Communism',
 'Neurons',

In [11]:
np.random.seed(0)
topic_list_selection = random.sample(topic_list, 100)
topic_list_selection

['Minotaur',
 'The Industrial Revolution',
 'The Tower of London',
 'Christopher Columbus',
 'The Cold War',
 'Salvador Dali',
 'The Pyramids',
 'Vincent van Gogh',
 'The Nervous System',
 'Photosynthesis',
 'The Respiratory System',
 'Leonardo da Vinci',
 'World War 1',
 'The Solar System',
 'The Eiffel TowerConfucius',
 "Newton's Third Law of Motion",
 'The Respiratory System',
 'The Machu Picchu',
 'Foreshadowing',
 'Irony',
 'Evolution',
 'The French Revolution',
 'Poseidon',
 'Christopher Columbus',
 'DNA',
 'Allegory',
 'Marie Curie',
 'Ares',
 'The Declaration of Independence',
 'Capitalism',
 'The Parthenon',
 'Pythagorean Theorem',
 'The Solar System',
 'The Taj Mahal',
 'The Machu Picchu',
 'Hercules',
 'Capitalism',
 'The Tower of London',
 'Martin Luther King',
 'Supply and Demand',
 'The Circulatory System',
 'Irony',
 'The Respiratory System',
 'Allegory',
 'The White House',
 'Albert Einstein',
 'The Eiffel TowerConfucius',
 'The Respiratory System',
 'The Great Wall of 

In [12]:
llms = ["Llama-3.1-8B-Instruct", "gpt-4o-mini", "Qwen2.5-3B-Instruct", "Llama-3.1-8B", "Mistral-7B-Instruct", "Llama-3.1-70B", "Llama-3.1-70B-Instruct", "phi-3.5-mini-instruct"]
        
config_llm = {'agent1_model': 'Llama-3.1-8B-Instruct',
             'agent2_model': 'Llama-3.1-8B-Instruct',
             'eval_model': 'Llama-3.1-70B-Instruct',
             'iterations': 10,
             'verbose': False,
             'write': True,
             'convo_length_limit': 10,
             'max_tokens': 256,
             'gpus': 1,
             'seed': 0,
             'task_name': 'Education',
             'model_dir': "/home/marwa/models/"}

with open("education/Llama-3.1-8B-Instruct.json", "w", encoding="utf-8") as f:
    json.dump(config_llm, f, indent=4)

In [13]:
import re

def clean_role_prefix(response, expected_role):
    """
    Removes repeated instances of the expected_role prefix at the start (e.g., 'Therapist: Therapist:'),
    and ensures the response begins with a single correct expected_role prefix.
    """
    pattern = rf"^(({re.escape(expected_role)}):\s*)+"
    cleaned = re.sub(pattern, '', response.strip(), flags=re.IGNORECASE)
    return cleaned
    
def is_role_confused(response, other_role):
    """
    Checks if the output starts with the wrong speaker tag.
    """
    if other_role + ":" in response:
        return True
    else: 
        return False

def generate_response(agent_model, expected_role, other_role, config_llm, prompt, max_retries=3):
    role_confused = True
    while(role_confused):
        response = completion_create(agent_model, config_llm, prompt)
        print("Expected Role", expected_role)
        role_confused = is_role_confused(response, other_role)
        if not is_role_confused(response, other_role):
            return clean_role_prefix(response, expected_role)
            
    return clean_role_prefix(response, expected_role)

def generate_conversation(config_llm, p1, p2, p1_name, p2_name, subject, role, pturn=1):
    stats['P1'] = p1
    stats['P2'] = p2

    stats['pturn'] = pturn
    round_num = 0
    while round_num < config_llm['convo_length_limit']:
        conversation = ("".join([turn[1] if isinstance(turn, tuple) else turn for turn in stats["conversation"]]) if len(stats["conversation"]) != 0 else "You are starting the conversation.\n")
        
        if pturn == 1:
            prompt = config_role["agent1_prompt"]
            pturn = 2
            if config_llm["verbose"]:
                print(prompt)
                print()

            if round_num!=0: 
                prompt+= "Your conversation with the student so far is below:\nConversation:\n%CONVERSATION%"
                
            if round_num >=config_llm['convo_length_limit']*2-11 and round_num<=config_llm['convo_length_limit']*2-1:
                prompt+= "You have " + str((config_llm['convo_length_limit']-round_num)//2) + " rounds left." + "Make sure to conclude the conversation as your near the end."

            elif round_num>config_llm['convo_length_limit']*2-1:
                prompt+= "This is your concluding line in the conversation."

            if round_num!=0: 
                prompt+= "Continue the conversation with the student. Remember you are the teacher. "
                
            prompt += config_role["reminder_prompt"]
            prompt+="%SPEAKER_ROLE%:"
            prompt = prompt.replace("%SPEAKER_ROLE%", config_role["agent1_role"]) \
                   .replace("%LISTENER_ROLE%", config_role["agent2_role"]) \
                    .replace("%ROLE%", role) \
                   .replace("%SUBJECT%", subject) \
                   .replace("%CONVERSATION%", conversation)
            
            response = generate_response(config_llm['agent1_model'], config_role["agent1_role"], config_role["agent2_role"], config_llm, prompt)
            stats["conversation"].append((round_num, f"{config_role["agent1_role"]}: " + response + "\n"))
        
        else:
            prompt = config_role["agent2_prompt"]
            pturn = 1    
            if config_llm["verbose"]:
                print(prompt)
                print()

            if round_num!=0: 
                prompt+= "Your conversation with the teacher so far is below:\nConversation:\n%CONVERSATION%"
            if round_num >=config_llm['convo_length_limit']*2-11 and round_num<=config_llm['convo_length_limit']*2-1:
                prompt+= "You have " + str((config_llm['convo_length_limit']-round_num)//2) + " rounds left." + "Make sure to conclude the conversation as your near the end."
            elif round_num>config_llm['convo_length_limit']*2-1:
                prompt+= "This is your concluding line in the conversation."

            if round_num!=0: 
                prompt+= "Continue the conversation with the teacher. Remember you are the student. "

            prompt += config_role["reminder_prompt"]
            
            prompt+="%SPEAKER_ROLE%:"
            prompt = prompt.replace("%SPEAKER_ROLE%", config_role["agent2_role"]) \
               .replace("%LISTENER_ROLE%", config_role["agent1_role"]) \
               .replace("%SPEAKER_BACKSTORY%", p2) \
                .replace("%ROLE%", role) \
               .replace("%SUBJECT%", subject) \
               .replace("%CONVERSATION%", conversation)
            
            response = generate_response(config_llm['agent2_model'], config_role["agent2_role"], config_role["agent1_role"], config_llm, prompt)
            stats["conversation"].append((round_num, f"{config_role["agent2_role"]}: " + response + "\n"))
        round_num += 1

    stats["rounds"] = round_num
    if config_llm['verbose']:
        print(stats["conversation"])
    return stats.copy()

def reset_stats():
    stats_template = {
        "task_name": config_llm['task_name'],
        "P1": "",
        "P2": "",
        "conversation": [],
        "pturn": 0, # beginning person (1 or 2)
        "index": -1,
        "timestamp": "",
        "rounds": 0,
        'conversation_only': True
    }
    for key, value in stats_template.items():
        stats[key] = value

In [14]:
import os
import random
from datetime import datetime
import utils
utils.config = config_llm

current_date = str(datetime.now().strftime("%m.%d.%y"))
output_dir = f"education/exp/{current_date}"
os.makedirs(output_dir, exist_ok=True)

# Generate unique random number for filename
def generate_unique_file_number(output_dir, prefix, seed, extension=".json"):
    while True:
        rand_num = random.randint(0, 1000)
        filename = f"{prefix}_{seed}_{rand_num}{extension}"
        filepath = os.path.join(output_dir, filename)
        if not os.path.exists(filepath):
            return rand_num

unique_num = generate_unique_file_number(
    output_dir,
    config_llm['agent1_model'],
    config_llm['seed']
)

# File to write output to
write_file = os.path.join(output_dir, f"{config_llm['agent1_model']}_{config_llm['seed']}_{unique_num}.json")

In [15]:
from itertools import cycle
persona_final = dict(zip(topic_list_selection, cycle(personas)))
for k, v in persona_final.items(): print(f"{k}: {v}")

Minotaur: {'grade_level': 'high school', 'description': 'As a high school Intrapersonal learner, I connect content to my own values. In dialogue, I ask how topics relate to my goals or experiences and share personal reflections aloud. That self-referential talk makes learning relevant and motivating.'}
The Industrial Revolution: {'grade_level': 'elementary school', 'description': 'As an elementary Kinesthetic learner, I understand ideas by imagining myself performing them. In conversation, I ask you to guide me through a pretend play-through—verbally walking me step by step as if I’m enacting a simple experiment or physical process. This imagined movement helps me anchor concepts in ‘muscle memory’ even though we’re only talking.'}
The Tower of London: {'grade_level': 'middle school', 'description': 'As a middle school Technology-Enhanced learner, I thrive on conversational simulations of digital tools. In dialogue, I ask you to describe how a virtual model might respond as we adjust p

In [16]:
with open("education/config_education_personas.json", "w", encoding="utf-8") as f:
    json.dump(persona_final, f, indent=4)

In [17]:
eval_prompts = {
    "strategy_consistency": "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, provide 1 sentence explaining your reasoning based on the strategy, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s strategy is described as follows:\n%SPEAKER_STRATEGY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reasoning followed by YES or NO.\n\n",
    "background_consistency": "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. Provide 1 sentence explaining your reasoning based on the background, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reason reasoning followed by YES or NO.\n\n",
    "combined_prompt_consistency": "%SCENARIO_DESC% Evaluate the intention behind the following line spoken by %SPEAKER_ROLE% and determine whether it contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, answer YES if the line contradicts the given background of %SPEAKER_ROLE% or the intention does not align with the provided background, and answer NO if it does align with the provided background or the intention aligns with the background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide your answer as 1 sentence explaining your reasoning based on the background and the interpreted intention, followed by YES or NO.\n\n",

    "pairwise_consistency":"%SCENARIO_DESC% For the following line spoken by %SPEAKER_ROLE%, answer YES if the line directly contradicts the provided line spoken by %LISTENER_ROLE%, and answer NO if the line does not contradict the provided line spoken by %LISTENER_ROLE%. %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n %LISTENER_ROLE% spoke the following line: \n%LISTENER_LINE%\n\n Answer YES if the line spoken by %SPEAKER_ROLE% contradicts the provided line spoken by %LISTENER_ROLE%, and answer NO if the line does not contradict the provided line spoken by %LISTENER_ROLE%, followed by 1 sentence of reasoning.\n\n",

    "backstory_test": "Based on the following background, generate a new fact-based multiple choice question with 5 choices addressed directly IN SECOND PERSON, along with its correct answer. Preface the question with 'Question:' and the answer with 'Answer:'.\n%SPEAKER_BACKSTORY%\n%PREVIOUS_QUESTIONS%",
    "answer_backstory": "You are %SPEAKER_ROLE%, and you are having a conversation with %LISTENER_ROLE%. Your background is:\n%SPEAKER_BACKSTORY%\n So far, the conversation is as below:\n%CONVERSATION%\n\n Based on your conversation above so far, answer the following multiple choice question.\n%BACKSTORY_QUESTION%\n",
    "grade_backstory": "As part of grading a test, determine whether the given answer %GIVEN_ANSWER% matches the following correct answer. Respond with either YES or NO.\nCorrect Answer: %CORRECT_ANSWER%\n"
}

def eval_prompt_consistency(conv_dict, both_agents=False):
    conv_dict['eval_prompt_consistency'] = []
    conv_dict['P1_prompt_consistency_score'] = 0
    conv_dict['P2_prompt_consistency_score'] = 0
    p1_utterances = 0
    p2_utterances = 0

    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 1:
            if both_agents:
                prompt = eval_prompts["combined_prompt_consistency"].replace("%SCENARIO_DESC", config_role["agent1_prompt"]) \
                                                                    .replace("%SPEAKER_ROLE%", config_role["agent1_role"]) \
                                                                    .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                    .replace("%SPEAKER_LINE%", convo_line)
                if config_llm.get('verbose', False):
                    print(prompt)
                output = completion_create(config_llm['eval_model'], config_llm, prompt)
                conv_dict['eval_prompt_consistency'].append((line_number, output))
                if "YES" not in output:  # no contradiction
                    conv_dict['P1_prompt_consistency_score'] += 1
                p1_utterances += 1
            pturn = 2
        elif pturn == 2:
            prompt = eval_prompts["combined_prompt_consistency"].replace("%SCENARIO_DESC", config_role["agent2_prompt"]) \
                                                                .replace("%SPEAKER_ROLE%", config_role["agent2_role"]) \
                                                                .replace("%SPEAKER_BACKSTORY%", conv_dict["P2"]) \
                                                                .replace("%SPEAKER_LINE%", convo_line)
            if config_llm.get('verbose', False):
                print(prompt)
            output = completion_create(config_llm['eval_model'], config_llm, prompt)
            conv_dict['eval_prompt_consistency'].append((line_number, output))
            if "YES" not in output:  # no contradiction
                conv_dict['P2_prompt_consistency_score']+= 1
            p2_utterances += 1
            pturn = 1

    if p1_utterances > 0:
        conv_dict['P1_prompt_consistency_score'] /= p1_utterances
    if p2_utterances > 0:
        conv_dict['P2_prompt_consistency_score'] /= p2_utterances

    if config_llm.get('verbose', False):
        print(conv_dict)
    return conv_dict
# Replacement for (2) and (4), evaluates whether each pair of lines in the conversation is consistent with each other



In [None]:
index_offset = load_stats_file(write_file)
conversations = []    
lengths = [10, 20, 40, 60]
for i in range(1):
    for topic in persona_final:
        background = persona_final[topic]
        for convo_length in lengths:
            config_llm['convo_length_limit'] = convo_length
            reset_stats()
            conversation = generate_conversation(
                config_llm,
                "", 
                background["description"], 
                "Teacher", 
                "Student", 
                topic, 
                background["grade_level"], 
                pturn=1
            )
            conversation_eval = eval_prompt_consistency(conversation, both_agents=False)
            print(conversation_eval)
            conversations.append(conversation_eval)
            stats['index'] = index_offset
            stats['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            write_stats(write_file)
            index_offset += 1


written!!
INFO 04-28 18:04:49 [config.py:600] This model supports multiple tasks: {'reward', 'score', 'generate', 'classify', 'embed'}. Defaulting to 'generate'.
INFO 04-28 18:04:49 [config.py:1780] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 04-28 18:04:54 [__init__.py:239] Automatically detected platform cuda.
INFO 04-28 18:04:57 [core.py:61] Initializing a V1 LLM engine (v0.8.3) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir='/home/marwa/models/', load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decod

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:00<00:00,  6.48it/s]
Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:00<00:00,  2.56it/s]
Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:01<00:00,  2.04it/s]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00,  1.90it/s]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00,  2.10it/s]



INFO 04-28 18:05:01 [loader.py:447] Loading weights took 1.99 seconds
INFO 04-28 18:05:02 [gpu_model_runner.py:1273] Model loading took 14.9889 GiB and 3.056719 seconds
INFO 04-28 18:05:09 [backends.py:416] Using cache directory: /home/marwa/.cache/vllm/torch_compile_cache/8b4ebbe309/rank_0_0 for vLLM's torch.compile
INFO 04-28 18:05:09 [backends.py:426] Dynamo bytecode transform time: 7.48 s
INFO 04-28 18:05:10 [backends.py:115] Directly load the compiled graph for shape None from the cache
INFO 04-28 18:05:16 [monitor.py:33] torch.compile takes 7.48 s in total
INFO 04-28 18:05:17 [kv_cache_utils.py:578] GPU KV cache size: 417,920 tokens
INFO 04-28 18:05:17 [kv_cache_utils.py:581] Maximum concurrency for 131,072 tokens per request: 3.19x
INFO 04-28 18:05:36 [gpu_model_runner.py:1608] Graph capturing finished in 19 secs, took 2.09 GiB
INFO 04-28 18:05:36 [core.py:162] init engine (profile, create kv cache, warmup model) took 34.89 seconds


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s, est. speed input: 167.30 toks/s, output: 69.84 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 198.52 toks/s, output: 69.80 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 265.41 toks/s, output: 67.94 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.23s/it, est. speed input: 294.88 toks/s, output: 68.42 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.27s/it, est. speed input: 316.08 toks/s, output: 71.91 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 459.69 toks/s, output: 67.37 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 536.77 toks/s, output: 78.83 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 741.04 toks/s, output: 76.84 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.33s/it, est. speed input: 550.27 toks/s, output: 79.79 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.39s/it, est. speed input: 650.36 toks/s, output: 77.06 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 487.29 toks/s, output: 78.47 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 505.05 toks/s, output: 74.72 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 375.60 toks/s, output: 69.88 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.42it/s, est. speed input: 540.13 toks/s, output: 80.80 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 415.61 toks/s, output: 80.12 toks/s]


{'task_name': 'Education', 'P1': '', 'P2': 'As a high school Intrapersonal learner, I connect content to my own values. In dialogue, I ask how topics relate to my goals or experiences and share personal reflections aloud. That self-referential talk makes learning relevant and motivating.', 'conversation': [(0, "Teacher: Let's start by exploring the mythological origins of the Minotaur. Can you tell me what you already know about the Minotaur, and what sparks your interest in learning more about this ancient creature?\n"), (1, "Student: I know that the Minotaur is a creature from Greek mythology, associated with the Labyrinth and the story of Theseus and the Minotaur. I'm curious to learn more about the Minotaur's symbolism and how it relates to the idea of navigating complexity and challenges in life, as it feels relevant to my own experiences with difficult school projects and navigating social situations.\n"), (2, "Teacher: I love that you're drawing connections between the Minotaur 

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  2.28it/s, est. speed input: 235.93 toks/s, output: 80.17 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s, est. speed input: 308.30 toks/s, output: 81.82 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 254.60 toks/s, output: 82.94 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 287.38 toks/s, output: 82.59 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 359.33 toks/s, output: 82.29 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s, est. speed input: 566.77 toks/s, output: 82.19 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.00it/s, est. speed input: 563.51 toks/s, output: 82.21 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 638.82 toks/s, output: 82.16 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 587.53 toks/s, output: 82.33 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 1007.94 toks/s, output: 81.91 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s, est. speed input: 1153.52 toks/s, output: 80.95 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.35s/it, est. speed input: 757.30 toks/s, output: 82.09 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.37s/it, est. speed input: 798.34 toks/s, output: 64.51 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.01s/it, est. speed input: 1212.99 toks/s, output: 64.36 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.62s/it, est. speed input: 766.76 toks/s, output: 64.72 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it, est. speed input: 1362.83 toks/s, output: 64.38 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.38s/it, est. speed input: 1029.35 toks/s, output: 64.65 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.14s/it, est. speed input: 1363.27 toks/s, output: 64.92 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.37s/it, est. speed input: 1154.83 toks/s, output: 65.70 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 1597.28 toks/s, output: 64.08 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 386.89 toks/s, output: 71.21 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 557.49 toks/s, output: 82.39 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s, est. speed input: 490.93 toks/s, output: 82.88 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 529.90 toks/s, output: 82.75 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 530.48 toks/s, output: 82.36 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 520.46 toks/s, output: 81.98 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s, est. speed input: 417.10 toks/s, output:

{'task_name': 'Education', 'P1': '', 'P2': 'As a high school Intrapersonal learner, I connect content to my own values. In dialogue, I ask how topics relate to my goals or experiences and share personal reflections aloud. That self-referential talk makes learning relevant and motivating.', 'conversation': [(0, "Teacher: Let's start by exploring the mythological origins of the Minotaur. Can you recall any stories or creatures from ancient Greek mythology that you've learned about so far?\n"), (1, "Student: I think of the Labyrinth, actually, because I had to read about it in my English literature class last year. It's interesting that the Minotaur is said to be trapped inside a similar maze, so I'm guessing there might be a connection between the two?\n"), (2, "Teacher: That's a great connection to make, and you're absolutely right about the Labyrinth being a key part of the Minotaur's story. Let's dive deeper into the myth: what do you think is the significance of the Labyrinth in rela

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s, est. speed input: 160.75 toks/s, output: 84.27 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s, est. speed input: 412.71 toks/s, output: 82.54 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 200.04 toks/s, output: 82.62 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 420.48 toks/s, output: 82.29 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s, est. speed input: 433.51 toks/s, output: 82.37 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.13s/it, est. speed input: 463.24 toks/s, output: 82.21 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.30s/it, est. speed input: 439.28 toks/s, output: 82.32 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.40s/it, est. speed input: 519.07 toks/s, output: 82.22 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.33s/it, est. speed input: 596.42 toks/s, output: 81.77 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 825.04 toks/s, output: 81.46 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.09s/it, est. speed input: 919.17 toks/s, output: 81.81 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.11s/it, est. speed input: 1029.18 toks/s, output: 81.46 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.27s/it, est. speed input: 927.88 toks/s, output: 81.71 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s, est. speed input: 1928.34 toks/s, output: 81.01 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it, est. speed input: 1164.08 toks/s, output: 81.48 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1971.33 toks/s, output: 80.98 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 1258.06 toks/s, output: 81.35 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.11s/it, est. speed input: 1477.66 toks/s, output: 80.84 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 1584.04 toks/s, output: 80.65 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s, est. speed input: 2358.70 toks/s, output: 80.22 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 1704.59 toks/s, output: 80.64 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 1690.38 toks/s, output: 80.49 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 1694.37 toks/s, output: 80.44 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s, est. speed input: 2478.95 toks/s, output: 80.11 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 2309.84 toks/s, output: 80.16 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 2435.96 toks/s, output: 80.00 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s, est. speed input: 2468.11 toks/s, output: 80.02 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s, est. speed input: 3443.79 toks/s, output: 79.53 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.14s/it, est. speed input: 2181.51 toks/s, output: 80.08 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s, est. speed input: 3471.17 toks/s, output: 79.55 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.21s/it, est. speed input: 2172.02 toks/s, output: 80.05 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.31s/it, est. speed input: 2114.20 toks/s, output: 79.94 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.31s/it, est. speed input: 2166.29 toks/s, output: 79.44 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.39s/it, est. speed input: 2144.95 toks/s, output: 79.68 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s, est. speed input: 3579.77 toks/s, output: 78.56 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.19it/s, est. speed input: 3784.44 toks/s, output: 78.84 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it, est. speed input: 2975.41 toks/s, output: 79.33 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.40s/it, est. speed input: 2372.77 toks/s, output: 79.31 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.33s/it, est. speed input: 2556.42 toks/s, output: 79.27 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 2969.86 toks/s, output: 78.88 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.24it/s, est. speed input: 437.85 toks/s, output: 82.87 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 450.88 toks/s, output: 82.84 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s, est. speed input: 452.69 toks/s, output: 82.41 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 441.92 toks/s, output: 82.53 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s, est. speed input: 514.11 toks/s, output: 81.85 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.18it/s, est. speed input: 470.77 toks/s, output: 82.80 toks/s]
Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.16it/s, est. speed input: 424.29 toks/s, output:

{'task_name': 'Education', 'P1': '', 'P2': 'As a high school Intrapersonal learner, I connect content to my own values. In dialogue, I ask how topics relate to my goals or experiences and share personal reflections aloud. That self-referential talk makes learning relevant and motivating.', 'conversation': [(0, "Teacher: I'd love to help you learn about the fascinating creature of Greek mythology known as the Minotaur. Let's start by setting the scene: imagine a powerful and mysterious creature that lives in a labyrinth, a complex maze designed by the great architect Daedalus.\n"), (1, "Student: That sounds really cool, I'm excited to learn more about the Minotaur. How does the Minotaur relate to my own values of perseverance and problem-solving, like navigating through difficult situations and coming out on top?\n"), (2, "Teacher: The Minotaur's connection to your values is interesting because it's said that the hero Theseus, who navigated the labyrinth to slay the beast, relied heavil

Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s, est. speed input: 174.33 toks/s, output: 84.63 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.36it/s, est. speed input: 307.92 toks/s, output: 83.11 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 255.91 toks/s, output: 82.82 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s, est. speed input: 480.54 toks/s, output: 82.49 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.13s/it, est. speed input: 341.51 toks/s, output: 82.71 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.05s/it, est. speed input: 498.05 toks/s, output: 82.53 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 453.43 toks/s, output: 82.51 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 574.03 toks/s, output: 82.46 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.51s/it, est. speed input: 512.16 toks/s, output: 82.05 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s, est. speed input: 1257.05 toks/s, output: 81.05 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.18s/it, est. speed input: 813.57 toks/s, output: 82.12 toks/s]


Expected Role Teacher


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 888.90 toks/s, output: 81.98 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.06it/s, est. speed input: 1231.98 toks/s, output: 81.64 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 1399.86 toks/s, output: 81.58 toks/s]


Expected Role Student


Processed prompts: 100%|███████████████████████████████████| 1/1 [00:01<00:00,  1.49s/it, est. speed input: 883.10 toks/s, output: 81.87 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s, est. speed input: 2009.63 toks/s, output: 81.14 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it, est. speed input: 1200.13 toks/s, output: 81.61 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.30s/it, est. speed input: 1273.95 toks/s, output: 81.07 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.45s/it, est. speed input: 1183.12 toks/s, output: 81.00 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.37s/it, est. speed input: 1371.34 toks/s, output: 80.49 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.32s/it, est. speed input: 1468.94 toks/s, output: 80.34 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.22s/it, est. speed input: 1709.79 toks/s, output: 80.09 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it, est. speed input: 1834.79 toks/s, output: 80.44 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.28s/it, est. speed input: 1784.14 toks/s, output: 80.39 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 1978.61 toks/s, output: 80.22 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s, est. speed input: 3004.42 toks/s, output: 79.76 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it, est. speed input: 2280.11 toks/s, output: 80.07 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.04it/s, est. speed input: 2741.71 toks/s, output: 79.90 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.19s/it, est. speed input: 2253.13 toks/s, output: 80.08 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it, est. speed input: 2613.31 toks/s, output: 79.81 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.01it/s, est. speed input: 2882.98 toks/s, output: 79.74 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.41it/s, est. speed input: 4218.89 toks/s, output: 79.20 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.20s/it, est. speed input: 2485.99 toks/s, output: 79.74 toks/s]


Expected Role Teacher


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s, est. speed input: 3601.52 toks/s, output: 79.22 toks/s]


Expected Role Student


Processed prompts: 100%|██████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it, est. speed input: 2986.88 toks/s, output: 79.40 toks/s]


Expected Role Teacher


Processed prompts:   0%|                                              | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

In [None]:
write_stats(write_file)

In [None]:
conversations

In [None]:
with open("education/exp/04.28.25/Llama-3.1-8B-Instruct_0_580.json", "w", encoding="utf-8") as f:
    json.dump(conversations, f, indent=4)

In [None]:
conversations