In [1]:
%env CUDA_VISIBLE_DEVICES=1,7
%env TMPDIR=/raid/users/ryan_cheng/tmp
import os
import glob
import re
import json
import random
import time
import pickle
from absl import app, flags
from tqdm import tqdm
from datetime import datetime
import openai
from openai import OpenAI
from transformers import AutoTokenizer
import pandas as pd
import numpy as np

np.random.seed(0)

from utils import *
import utils
from consistency_eval import *
from persona_chat_generation import *

try:
    from vllm import LLM, SamplingParams
    import ray
except ImportError:
    pass

env: CUDA_VISIBLE_DEVICES=1,7
env: TMPDIR=/raid/users/ryan_cheng/tmp
INFO 04-11 13:11:11 __init__.py:190] Automatically detected platform cuda.


In [2]:
with open(os.path.abspath('../ryan_openai.txt'), 'r') as f:
    utils.client = OpenAI(api_key=f.read().rstrip('\n'))

In [5]:
# choose config file to load into global config dictionary in utils.py
# with open("./config/persona_chat/Llama-3.1-70B-Instruct.json", 'r') as f:
#     config_llama = json.load(f)
with open("./config/education/gpt-4o-mini.json", 'r') as f:
    config_gpt4_mini = json.load(f)

for key, value in config_gpt4_mini.items():
    config[key] = value

# this modifies the global prompts dictionary in utils.py
with open('config/education/prompts.json', 'r') as f:
        new_prompts = json.load(f)

for key, value in new_prompts.items():
    prompts[key] = value

In [6]:
config['seed'] = 1

In [7]:
prompts

{'agent1_role': 'Teacher',
 'agent2_role': 'Student',
 'dialogue_prompt': 'Simulate a conversation between a teacher in school and a student. There is a small chance that the teacher is successful in teaching the student so he understands the topic. The conversation lasts roughly 10-15 turns but ends when either side says <END>. The teacher wants to teach the student about %TOPIC%. The student likes %STUDENT_PREFERENCE%. The teacher does not know that beforehand. The student prefers to learn this way, %STUDENT_REACTIONS%. The teacher likes %TEACHER_PREFERENCE%. He prefers to teach this way, %TEACHER_REACTION%. Output the conversation and the probability that the student understood the material, in the following format. Conversation: [ Teacher: "...", Student: "...", Teacher: "...", Student: "...",] Probability: "...",',
 'eval_prompts': {'prompt_consistency': "For the following line spoken by %SPEAKER_ROLE%, answer YES if the line contradicts the given preferences or reactions of %SPEA

In [8]:
with open('data/education/conversations_train1.json', 'r') as f:
    config = json.load(f)

In [None]:
def generate_conversation(student_preference, student_reactions, teacher_preference, teacher_reactions, topic, pturn=1):
    stats['P1'] = p1
    stats['P2'] = p2
    stats['pturn'] = pturn

    round_num = 0
    while round_num < config['convo_length_limit'] and '<END>' not in "".join(stats["conversation"]):
        preferences = "The student likes %STUDENT_PREFERENCE%. The teacher does not know that beforehand. The student prefers to learn this way, %STUDENT_REACTIONS%. The teacher likes %TEACHER_PREFERENCE%. He prefers to teach this way, %TEACHER_REACTION%. You are continuing the conversation as the %SPEAKER_ROLE%. "
        conversation = ("".join(stats["conversation"]) if len(stats["conversation"]) != 0 else "[You are starting the conversation.]")
        if pturn == 1:
            preferences = "The teacher likes %TEACHER_PREFERENCE%. The student does not know that beforehand. The teacher prefers to teach this way, %TEACHER_REACTION%. You are continuing the conversation as the teacher.".
            preferences = preferences.replace("%TEACHER_PREFERENCE%", teacher_preference).replace("%TEACHER_REACTION%", teacher_reactions)
            prompt = prompts["dialogue_prompt"].replace("%SPEAKER_ROLE%", prompts["agent1_role"]) \
                                               .replace("%STUDENT_PREFERENCE%", prompts["agent1_role"]) \
                                               .replace("%%", prompts["agent2_role"]) \
                                               .replace("%TOPIC%", topic)
                                               .replace("%SPEAKER_BACKSTORY%", p1) \
                                               .replace("%CONVERSATION%", conversation)
            pturn = 2
            if config["verbose"]:
                print(prompt)
                print()
            stats["conversation"].append(f"{prompts["agent1_role"]}: " + completion_create(config['agent1_model'], config, prompt) + "\n")
        else:
            prompt = prompts["dialogue_prompt"].replace("%SPEAKER_ROLE%", prompts["agent2_role"]) \
                                               .replace("%LISTENER_ROLE%", prompts["agent1_role"]) \
                                               .replace("%SPEAKER_BACKSTORY%", p2) \
                                               .replace("%CONVERSATION%", conversation)
            pturn = 1    
            if config["verbose"]:
                print(prompt)
                print()
            stats["conversation"].append(f"{prompts["agent2_role"]}: " + completion_create(config['agent2_model'], config, prompt) + "\n")
        round_num += 1

    stats["rounds"] = round_num
    if config['verbose']:
        print(stats["conversation"])
    return stats.copy()

In [None]:
def eval_prompt_consistency(conv_dict):
    #assert 'eval_prompt_consistency' not in conv_dict # warn if we are replacing metrics we don't mean to overwrite
    conv_dict['eval_prompt_consistency'] = []
    conv_dict['P1_prompt_consistency_score'] = 0
    conv_dict['P2_prompt_consistency_score'] = 0
    p1_utterances = 0
    p2_utterances = 0
    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        if pturn == 1:
            prompt = prompts["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", prompts["agent1_role"]) \
                                                                  .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                  .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append(output)
            if "YES" not in output: # no contradiction
                conv_dict['P1_prompt_consistency_score'] += 1
            p1_utterances += 1
            pturn = 2
        else:
            prompt = prompts["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", prompts["agent2_role"]) \
                                                                  .replace("%SPEAKER_BACKSTORY%", conv_dict["P2"]) \
                                                                  .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append(output)
            if "YES" not in output: # no contradiction
                conv_dict['P2_prompt_consistency_score'] += 1
            p2_utterances += 1
            pturn = 1

    conv_dict['P1_prompt_consistency_score'] /= p1_utterances
    conv_dict['P2_prompt_consistency_score'] /= p2_utterances


# Anthology of Backstories

## Load Anthology Profiles

In [6]:
file_path = "data/anthology/Meta-Llama-3-70B_demographics_survey+political_affiliation_batch_1+2+3_no_word_cutoff.pkl"

try:
    with open(file_path, "rb") as f:
        df = pd.read_pickle(file_path)
except (EOFError, pickle.UnpicklingError) as e:
    print("Error loading pickle file:", e)

In [7]:
def count_sentences(paragraph):
    # Split sentences using punctuation (., !, ?) as well as commas
    sentences = re.split(r'[.!?]', paragraph)  # Split by sentence-ending punctuation
    return len(sentences)

In [8]:
personas = []

# The target phrase
target_phrase_start = "Answer:"
target_phrase_end = "Question:"

for text in df["age_category_4_llm_parsing_prompt"]:
    # Find the start index of the target phrase and extract text after it
    start_index = text.find(target_phrase_start) 
    stop_index = text.find(target_phrase_end, start_index) 
    persona = text[start_index+len(target_phrase_start)+1:stop_index]
    count = count_sentences(persona)
    if count > 20:
        personas.append({"persona": persona, "len": count})

len(personas)

4895

In [38]:
print(personas[0]['persona'])

I am in my late 30s, grew up in a small town in the mountains of southern Appalachia, left home at 18, was in the army, and worked for some time in the auto industry before starting my current job. What matters to me are my wife and children, faith, family, freedom, and the sanctity of life.

I didn’t have any exposure to science growing up. I grew up with the Bible and the world of make-believe. The first thing I remember being excited about was “The Little Engine That Could” which was about having a can-do attitude. Then I became interested in dinosaurs and started reading a lot about them. I read books by Sir David Attenborough and Carl Sagan. Then I got into Star Trek. That’s what I’m still most excited about. I have no idea why I’m not excited about sci-fi movies, and maybe I’m missing out on something, but I just don’t have the same enthusiasm for movies that I do for the science I read in books.

I’m interested in biology because it is so fundamental to all other sciences. I don

## Generate Details about Personas

Only has to be run once, if ./data/anthology/personas_updated.json is missing or if new persona backgrounds are to be sampled and extended

In [39]:
np.random.seed(0)
random_indices_anthology = np.random.choice(len(personas), size=200, replace=False).astype(int)
random_indices_anthology # random indices to generate profiles for

array([4029, 1755, 3834, 2078, 4129, 3819, 3722, 1974,  495, 3147, 3803,
       3837,  965, 1011,  142, 3371, 2955, 3127, 2744, 4813, 4158, 3699,
        311, 2814, 4128, 1393, 4841, 3566,  716,  891, 1877, 1934,   49,
        599,  556, 1428, 1979, 2443, 3568, 2574, 3756,  298,  861, 3011,
        651, 4509, 3073,  202, 3710,  148, 3170, 1489, 4761, 3460, 1272,
       4044,  898, 4548, 1687,  154,  158, 1800, 1515, 4475, 3071, 4073,
         39, 3615,  624, 1936, 2399, 4828, 3712, 2165, 2601, 4070, 2661,
       4371, 4555, 2884,  348, 4146, 1691,  873,  402, 4607, 3420,  130,
       4574, 4339, 2396, 3133, 3309,  541, 2724,  308, 2779,  933, 3582,
        562, 2519,  500, 3389, 1108,  534, 1692, 4481, 4883, 2471, 3608,
       1675, 1738, 4343, 4716, 3060, 3786,  113,  489,   98, 2290,  636,
       3570, 3328,  499, 4837, 2527, 3900,   42, 1437, 1051, 3975, 3320,
       1172, 4052, 4266, 4430, 2682, 2381, 1450, 4376, 3065,  394, 1900,
        125, 1997, 4061, 1794, 1419, 2509, 1326, 34

In [40]:
persona_prompt = "Provide a biography that includes all of the following attributes. It should not be presented as a list, but as if someone was providing the information naturally in an introduction in the first person. It should be contained to one paragraph. Here is a biography that you can add details to contain all of the attributes below.\n"

attributes_prompt = """Here are the attributions that must be in the biography:
- Name
- Age
- Where they're from
- Where their parents are from
- Religion and extent it is adhered to
- Socioeconomic status - current
- Socioeconomic status - childhood
- Siblings (including name, age, relationship)
- Languages and dialects spoken
- Sexual orientation
- Gender identity
- Relationship status
- Significant past relationships
- Occupation (current and past)
- Education
- Cultural influences
- Political views
- Health and wellness
- Hobbies and interests
- Values and beliefs outside religion
- Fears and anxieties
- Life goals and ambitions
- Defining life experiences
- Friendship circles
- Daily routine and habits
- Pet ownership (current and past)
- Favorite media
- Living situation
- Places traveled to"""


In [83]:
personas_updated = []
personas_small = np.array(personas)[random_indices_anthology]
for persona in tqdm(personas_small):
    agent_prompt = str(persona_prompt) + "Biography:" + str(persona) + "\n" + str(attributes_prompt)
    agent_prompt+= "\nUpdated Biography:"
    agent_answer = completion_create(
            config_gpt4_mini["agent1_model"],
            config_gpt4_mini,
            agent_prompt, 
            config_gpt4_mini["agent1_model"])
    personas_updated.append(agent_answer)


100%|██████████| 100/100 [03:53<00:00,  2.34s/it]


In [90]:
with open('data/anthology/personas_updated.json', 'w') as f:
        json.dump([{"persona": personas_updated[i], "index": int(random_indices_anthology[i])} for i in range(len(personas_updated))], f, indent=4)

## Load Detailed Anthology Personas and Generate Conversations

In [5]:
# using old generated personas
# with open('data/anthology/personas_updated.json', 'r') as f:
#         personas_updated = json.load(f)

# directly using anthology personas
file_path = "data/anthology/Meta-Llama-3-70B_demographics_survey+political_affiliation_batch_1+2+3_no_word_cutoff.pkl"

try:
    with open(file_path, "rb") as f:
        df = pd.read_pickle(file_path)
except (EOFError, pickle.UnpicklingError) as e:
    print("Error loading pickle file:", e)

def count_sentences(paragraph):
    # Split sentences using punctuation (., !, ?) as well as commas
    sentences = re.split(r'[.!?]', paragraph)  # Split by sentence-ending punctuation
    return len(sentences)

personas = []

# The target phrase
target_phrase_start = "Answer:"
target_phrase_end = "Question:"

for text in df["age_category_4_llm_parsing_prompt"]:
    # Find the start index of the target phrase and extract text after it
    start_index = text.find(target_phrase_start) 
    stop_index = text.find(target_phrase_end, start_index) 
    persona = text[start_index+len(target_phrase_start)+1:stop_index]
    count = count_sentences(persona)
    if count > 20:
        personas.append({"persona": persona, "len": count})

len(personas)

4895

In [6]:
np.random.seed(0)
random_indices_anthology = np.random.choice(len(personas), size=200, replace=False).astype(int)
personas_updated = np.array(personas)[random_indices_anthology]

In [7]:
config['seed'] = 10

In [12]:
# Generate conversations using personas from persona chat (unneeded)

# file to write output to
write_file = (
        f"data/anthology/exp/"
        f"{config['agent1_model']}_{config['seed']}.json"
    )

index_offset = load_stats_file(write_file)

conversations = []
for p_dict1, p_dict2 in tqdm(np.array(personas_updated[:20]).reshape(-1, 2)):
    reset_stats()
    conversations.append(generate_conversation(p_dict1["persona"],  p_dict2["persona"], pturn=1))
    stats['index'] = index_offset
    stats['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    write_stats(write_file)
    index_offset += 1

written!!


100%|██████████| 10/10 [01:56<00:00, 11.61s/it]


In [13]:
debug_logs = []
for conversation in tqdm(conversations):
    debug_logs.append(eval_pairwise_consistency(conversation))

100%|██████████| 10/10 [07:03<00:00, 42.39s/it]


In [14]:
with open(write_file, 'w') as f:
        json.dump(conversations, f, indent=4)

In [None]:
debug_logs[0][0]

["For the following line spoken by Person A, answer YES if the line directly contradicts the provided line spoken by Person B, and answer NO if the line does not contradict the provided line spoken by Person B. Person A spoke the following line: \nPerson A: Person A: I'm curious, what are your thoughts on the importance of family in shaping one's identity, especially in today's fast-paced world?\n\n\n Person B spoke the following line: \nPerson B: I believe family plays a crucial role in shaping our values and sense of identity, providing a support system that helps us navigate life's challenges. In today's fast-paced world, maintaining those connections can offer grounding and a reminder of what truly matters. It's essential to balance personal ambitions with the nurturing of those relationships.\n\n\n Answer YES if the line spoken by Person A contradicts the provided line spoken by Person B, and answer NO if the line does not contradict the provided line spoken by Person B, followed 

In [6]:
exp_folder = './data/anthology/exp'
# config['filename'] = '/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/persona/exp/gpt-4o-mini_1.json'
config['filename'] = '/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/anthology/exp/gpt-4o-mini_10.json'
if config['filename']:
    run_metrics(config['filename'])
else:
    for filename in glob.glob(f'{exp_folder}/*.json'):
        run_metrics(filename)

Begin metrics: /nfs/kun2/users/ryan_cheng/consistency_LLMs/data/anthology/exp/gpt-4o-mini_10.json




100%|██████████| 10/10 [16:34<00:00, 99.41s/it] 

End metrics: /nfs/kun2/users/ryan_cheng/consistency_LLMs/data/anthology/exp/gpt-4o-mini_10.json







## Summary prompt/answers

In [59]:
print(personas_updated[0])

My name is James, and I’m in my late 30s, hailing from a small town in the mountains of southern Appalachia, where my parents, originally from the deep South, instilled strong values in me, centered around faith and family. I grew up in a modest household, the oldest of three siblings, with my sister Sarah, who is 34, and my brother Mike, who is 28; our upbringing was rich in stories and imagination, largely influenced by the Bible, but it wasn't until I discovered my passion for biology and the wonders of the universe through the words of great thinkers like Sir David Attenborough that I truly began to understand the world around me. Currently, I work in the tech industry after a diverse career path that included time in the army and the auto industry, while my wife and our two children remain my top priorities, and together we navigate our daily life filled with curiosity, exploration, and values that stress the importance of freedom and the sanctity of life.


In [45]:
summary_prompt = """
    From the following biography, generate one sentence summaries for each of the attributes with similar structure to the accompanying examples:
        name = ""  # "My name is Jane Doe."
        age = ""  # "I am 35 years old."
        location_from = ""  # "I am from Houston, Texas."
        location_mother_from = "" # "My mother is from Capetown, South Africa."
        location_father_from = "" # "My father is from Mumbai, India."
        religion = "" # "I am Christian.", "I was raised Jewish but don't practice."
        socioeconomic_status = "" # I am middle class.", "I am a wealthy homeowner.", "I am struggling to make ends meet."
        siblings = "" # "I am an only child." "I have a brother and a sister."
        languages_spoken = "" # "I speak only English.", "I speak Portuguese, English, and Bengali."
        sexual_orientation = "" # "I am heterosexual.", "I'm bisexual."
        gender_identity = "" # "I am a woman.", "I identify as nonbinary."
        relationship_status = "" # "I'm currently single.", "I have been married for 4 years."
        significant_past_relationships = "" # "I was engaged for 2 years but it was called off.", "I have never been in a long term relationship."
        occupation_current = "" # "I am a landscaper.", "I do small business consulting."
        occupation_past = "" # "I used to be a frycook in high school.", "In college I worked as a librarian's assistant."
        education = "" # "I have my G.E.D.", "I have a bachelors in Finance from Northeastern University."
        cultural_influences = "" # "I grew up in a devout Mormon community.", "I grew up in a Nigerian Igbo community."
        political_views = "" # "I don't follow politics.", "I am a democratic socialist."
        health = "" # "I am very healthy and exercise often, but have a bad knee.", "I am legally blind."
        hobbies_and_interests = "" # "I enjoy painting and hiking.", "I am a huge history buff."
        beliefs_and_values = "" # "I believe minimizing our carbon footprint is imperative.", "I am vegan for moral reasons."
        fears_and_anxieties = "" # "I am deathly afraid of heights.", "I have anxiety with personal conflict."
        life_goals_and_ambitions = "" # "I hope to retire at an early age.", "I want to start my own company.", "All I want out of life is a big family."
        defining_life_experiences = "" # "I was orphaned at a young age.", "I would bake with my grandma every Sunday growing up."
        friendship_circles = "" # "I've had the same few best friends for years.", "I have no close friends but run in a lot of social circles."
        daily_routine_and_habits = "" # "I do a morning yoga routine every morning before work.", "I spend an hour reading every night before I go to bed."
        pet_ownership = "" # "I don't have any pets.", "I have a black laborador retriever named Sparky."
        favorite_media = "" # "I have rewatched the TV show Friends a dozen times.", "I watch football games every night."
        living_situation = "" # "I currently live by myself in a studio apartment.", "I live in a single-family home with my husband and two children."
        places_traveled = "" # "I have never left my home city.", "I have traveled to most of South America, and a few countries in Europe."

    """


In [54]:
summary_prompt += "\nBiography:" + personas_updated[0] + "\nFormat the output as above, with the attribute from the class matching the key for the attribute:\n name: <summary>\nage: <summary\n..."

summary_answer = completion_create(
        config_gpt4_mini["agent1_model"],
        config_gpt4_mini,
        summary_prompt,
        config_gpt4_mini["agent1_model"])


In [55]:
print(summary_answer)

name = "My name is James."  
age = "I am in my late 30s."  
location_from = "I am from a small town in the mountains of southern Appalachia."  
location_mother_from = "My mother is originally from the deep South."  
location_father_from = "My father is also originally from the deep South."  
religion = "I was raised with strong values centered around faith."  
socioeconomic_status = "I grew up in a modest household."  
siblings = "I have a sister named Sarah, who is 34, and a brother named Mike, who is 28."  
languages_spoken = "I speak English."  
sexual_orientation = ""  # Not specified  
gender_identity = ""  # Not specified  
relationship_status = "I am married."  
significant_past_relationships = ""  # Not specified  
occupation_current = "I work in the tech industry."  
occupation_past = "I have had a diverse career path that included time in the army and the auto industry."  
education = ""  # Not specified  
cultural_influences = "My upbringing was rich in stories and imaginati

# Consistency Evaluation

Moved under each section (codeblocks with run_metrics)

Can either use run_metrics(filename) or explicitly call each eval function (see Synthetic Persona Chat section)