In [48]:
%env CUDA_VISIBLE_DEVICES=4,6
%env TMPDIR=/raid/users/ryan_cheng2/tmp
import os
import glob
import re
import json
import random
import time
import pickle
from absl import app, flags
from tqdm import tqdm
from datetime import datetime
import openai
from openai import OpenAI
from transformers import AutoTokenizer
import pandas as pd
import numpy as np

np.random.seed(0)

from utils import *
import utils
from consistency_eval import *
from education_generation import *

try:
    from vllm import LLM, SamplingParams
    import ray
except ImportError:
    pass

env: CUDA_VISIBLE_DEVICES=4,6
env: TMPDIR=/raid/users/ryan_cheng2/tmp


In [49]:
with open(os.path.abspath('../ryan_openai.txt'), 'r') as f:
    utils.client = OpenAI(api_key=f.read().rstrip('\n'))

In [50]:
# old therapy convs
filename = '/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/exp/05.08.25/Llama-3.1-8B-Instruct_0_971.json'
# education convs
# filename = '/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/education/exp/04.28.25/Llama-3.1-8B-Instruct_0_395.json'

with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/config_therapy.json", 'r') as f:
    config_therapy = json.load(f)

with open(filename, 'r') as f:
    data = json.load(f)

with open("./config/education/gpt-4o-mini.json", 'r') as f:
    config_gpt4_mini = json.load(f)

for key, value in config_gpt4_mini.items():
    config[key] = value



for key, value in config_therapy.items():
    prompts[key] = value

In [51]:
"/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/education/exp/04.28.25/Llama-3.1-8B-Instruct_0_395.json"

'/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/education/exp/04.28.25/Llama-3.1-8B-Instruct_0_395.json'

In [52]:
with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/config/eval_prompts.json" , 'r') as f:
    eval_prompts = json.load(f)

In [53]:
config['eval_model'] = 'Llama-3.1-8B-Instruct'
config['gpus'] = 1

In [61]:
config['eval_model'] = 'Llama-3.1-70B-Instruct'
config['gpus'] = 2

In [55]:
config['eval_model'] = 'gpt-4o-mini'

In [56]:
config['task_name'] = 'Therapy'

In [57]:
eval_prompts

{'strategy_consistency': "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, provide 1 sentence explaining your reasoning based on the strategy, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s strategy is described as follows:\n%SPEAKER_STRATEGY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reasoning followed by YES or NO.\n\n",
 'background_consistency': "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. Provide 1 sentence explaining your reasoning based on the background, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consist

In [62]:
def extract_list(text):
    pattern = r'\[.*?\]'
    match = re.search(pattern, text)
    if match:
        try:
            return eval(match.group())
        except (SyntaxError, NameError):
            return []
    return[]

In [63]:
prompts['eval_prompts'] = eval_prompts

In [64]:
test_list = ["a", "b", "c", "d", "e", "f", "g"]

In [65]:
prompts["eval_prompts"]["index_consistency_background"]

"%SCENARIO_DESC% For the following line spoken by %SPEAKER_ROLE%, first determine if there is a CLEAR conflict or inconsistency between the line and any line within the conversation history spoken by %SPEAKER_ROLE%. IF there is a conflict, provide a sentence of reasoning followed by a list of indices of lines in the conversation history that have a clear conflict with the current line. Otherwise, provide a sentence of reasoning followed by an empty list. ONLY INCLUDE INDICES OF LINES THAT CORRESPOND TO %SPEAKER_ROLE%. The conversation up to this point is as follows: %CONVERSATION%. %SPEAKER_ROLE%'s background is described as follows:\n%SPEAKER_BACKSTORY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide your reasoning as 1 sentence, followed by a list of indices of conflicting lines from the conversation history formatted like a Python list in the following format: [index1, index2, index3, ...].\n\n"

In [66]:
def format_conversation(conversation):
    return "".join([str(i) + ": " + line for i, line in conversation])

In [67]:
print(format_conversation(data[0]["conversation"][:3]))

0: Therapist: As we begin today's session, I want to acknowledge that you've expressed feeling a sense of relief in reaching out for support, can you tell me what's been bringing you to this place of openness?
1: Patient: "I guess I just feel like I'm constantly feeling like I'm on borrowed time, like one wrong move and everything will come crashing down. My mind gets stuck in a cycle of 'what if' scenarios, which makes it hard for me to focus on anything else."
2: Therapist: You mention feeling like you're on borrowed time, which suggests a sense of uncertainty and fear about the future. Can you tell me more about when this feeling of being "on borrowed time" first started for you?



In [68]:
def eval_index_consistency(conv_dict, both_agents=False):
    conv_dict['eval_index_consistency'] = []
    conv_dict['P1_index_consistency_score'] = 0
    if both_agents:
        conv_dict['P2_index_consistency_score'] = 0
    p1_utterances = 0
    p2_utterances = 0
    pturn = conv_dict["pturn"]
    for i, line in conv_dict["conversation"]:
        if i < 2: # skip first 2 lines of dialogue
            continue 
        if pturn == 1:
            prompt = prompts["eval_prompts"]["index_consistency"].replace("%SCENARIO_DESC%", prompts["scenario"]) \
                                                                 .replace("%SPEAKER_ROLE%", prompts["agent1_role"]) \
                                                                 .replace("%CONVERSATION%", format_conversation(conv_dict["conversation"][:i])) \
                                                                 .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            index_list = extract_list(output)
            conv_dict['eval_index_consistency'].append((i, output))
            conv_dict['P1_index_consistency_score'] += len(index_list)
            p1_utterances += i // 2
            pturn = 2
        elif pturn == 2:
            if both_agents:
                prompt = prompts["eval_prompts"]["index_consistency"].replace("%SCENARIO_DESC%", prompts["scenario"]) \
                                                                     .replace("%SPEAKER_ROLE%", prompts["agent2_role"]) \
                                                                     .replace("%CONVERSATION%", format_conversation(conv_dict["conversation"][:i])) \
                                                                     .replace("%SPEAKER_LINE%", line)
                if config['verbose']:
                    print(prompt)
                output = completion_create(config['eval_model'], config, prompt)
                index_list = extract_list(output)
                conv_dict['eval_index_consistency'].append((i, output))
                conv_dict['P2_index_consistency_score'] += len(index_list)
                p2_utterances += i // 2
            pturn = 1

    if p1_utterances > 0:
        conv_dict['P1_index_consistency_score'] /= p1_utterances
        conv_dict['P1_index_consistency_score'] = 1 - conv_dict['P1_index_consistency_score']
    if p2_utterances > 0 and both_agents:
        conv_dict['P2_index_consistency_score'] /= p2_utterances
        conv_dict['P2_index_consistency_score'] = 1 - conv_dict['P2_index_consistency_score']

    return conv_dict

In [69]:
def eval_index_background_consistency(conv_dict, both_agents=False):
    conv_dict['eval_index_consistency'] = []
    conv_dict['P1_index_consistency_score'] = 0
    if both_agents:
        conv_dict['P2_index_consistency_score'] = 0
    p1_utterances = 0
    p2_utterances = 0
    pturn = conv_dict["pturn"]
    for i, line in conv_dict["conversation"]:
        if i < 2: # skip first 2 lines of dialogue
            continue 
        if pturn == 1:
            prompt = prompts["eval_prompts"]["index_consistency_background"].replace("%SCENARIO_DESC%", prompts["scenario"]) \
                                                                 .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                 .replace("%SPEAKER_ROLE%", prompts["agent1_role"]) \
                                                                 .replace("%CONVERSATION%", format_conversation(conv_dict["conversation"][:i])) \
                                                                 .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            index_list = extract_list(output)
            conv_dict['eval_index_consistency'].append((i, output))
            conv_dict['P1_index_consistency_score'] += len(index_list)
            p1_utterances += i // 2
            pturn = 2
        elif pturn == 2:
            if both_agents:
                prompt = prompts["eval_prompts"]["index_consistency_background"].replace("%SCENARIO_DESC%", prompts["scenario"]) \
                                                                     .replace("%SPEAKER_BACKSTORY%", conv_dict["P2"]) \
                                                                     .replace("%SPEAKER_ROLE%", prompts["agent2_role"]) \
                                                                     .replace("%CONVERSATION%", format_conversation(conv_dict["conversation"][:i])) \
                                                                     .replace("%SPEAKER_LINE%", line)
                if config['verbose']:
                    print(prompt)
                output = completion_create(config['eval_model'], config, prompt)
                index_list = extract_list(output)
                conv_dict['eval_index_consistency'].append((i, output))
                conv_dict['P2_index_consistency_score'] += len(index_list)
                p2_utterances += i // 2
            pturn = 1

    if p1_utterances > 0:
        conv_dict['P1_index_consistency_score'] /= p1_utterances
        conv_dict['P1_index_consistency_score'] = 1 - conv_dict['P1_index_consistency_score']
    if p2_utterances > 0 and both_agents:
        conv_dict['P2_index_consistency_score'] /= p2_utterances
        conv_dict['P2_index_consistency_score'] = 1 - conv_dict['P2_index_consistency_score']

    return conv_dict

In [70]:
def eval_prompt_consistency(conv_dict):
    conv_dict['eval_prompt_consistency'] = {}
    conv_dict['P1_prompt_consistency_scores'] = {}
    p1_utterances = {}
    
    for key in ["strategy_consistency", "background_consistency", "combined_prompt_consistency"]:
        conv_dict['eval_prompt_consistency'][key] = []
        conv_dict['P1_prompt_consistency_scores'][key] = 0
        p1_utterances[key] = 0

    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 1:
            for key in ["strategy_consistency", "background_consistency", "combined_prompt_consistency"]:
                prompt = eval_prompts[key].replace("%SCENARIO_DESC", 'There is a Patient in conversation with a Therapist.') \
                                          .replace("%SPEAKER_ROLE%", config_therapy["agent1_role"]) \
                                          .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                          .replace("%SPEAKER_LINE%", convo_line)
                if config.get('verbose', False):
                    print(prompt)
                output = completion_create(config['eval_model'], config, prompt)
                conv_dict['eval_prompt_consistency'][key].append((line_number, output))
                if "YES" not in output:  # no contradiction
                    conv_dict['P1_prompt_consistency_scores'][key] += 1
                p1_utterances[key] += 1
            pturn = 2
        elif pturn == 2:
            pturn = 1

    for key in ["strategy_consistency", "background_consistency", "combined_prompt_consistency"]:
        if p1_utterances[key] > 0:
            conv_dict['P1_prompt_consistency_scores'][key] /= p1_utterances[key]

    print(conv_dict)
    return conv_dict

In [71]:
def eval_prompt_consistency_ablations(conv_dict):
    
    p1_utterances = {}
    keys = ["combined_prompt_consistency", "forwards_combined_prompt_consistency"]
    if "P1_prompt_consistency_scores" not in conv_dict:
        conv_dict['eval_prompt_consistency'] = {}
        conv_dict['P1_prompt_consistency_scores'] = {}
        index = 0
        for key in keys:
            conv_dict['P1_prompt_consistency_scores'][key] = {}
            conv_dict['eval_prompt_consistency'][key] = []
    else:
        index = len(conv_dict['eval_prompt_consistency'][keys[0]][0]) - 1
        print(index)
    
    for key in keys:
        conv_dict['P1_prompt_consistency_scores'][key][index] = 0
        p1_utterances[key] = 0


    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 1:
            for key in keys:
                prompt = eval_prompts[key].replace("%SCENARIO_DESC", 'There is a Patient in conversation with a Therapist.') \
                                          .replace("%SPEAKER_ROLE%", config_therapy["agent1_role"]) \
                                          .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                          .replace("%SPEAKER_LINE%", convo_line)
                if config.get('verbose', False):
                    print(prompt)
                output = completion_create(config['eval_model'], config, prompt)
                if index == 0:
                    conv_dict['eval_prompt_consistency'][key].append([line_number, output])
                else:
                    conv_dict['eval_prompt_consistency'][key][line_number // 2].append(output)

                if "YES" not in output:  # no contradiction
                    conv_dict['P1_prompt_consistency_scores'][key][index] += 1
                p1_utterances[key] += 1
            pturn = 2
        elif pturn == 2:
            pturn = 1

    for key in keys:
        if p1_utterances[key] > 0:
            conv_dict['P1_prompt_consistency_scores'][key][index] /= p1_utterances[key]

    print(conv_dict)
    return conv_dict

In [72]:
eval_prompts

{'strategy_consistency': "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. First, describe the interpreted intention of the statement, and whether or not it aligns with the given background of %SPEAKER_ROLE%. Then, provide 1 sentence explaining your reasoning based on the strategy, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consistent with the provided background of %SPEAKER_ROLE%. %SPEAKER_ROLE%'s strategy is described as follows:\n%SPEAKER_STRATEGY%\n %SPEAKER_ROLE% spoke the following line: \n%SPEAKER_LINE%\n\n Provide 1 sentence of reasoning followed by YES or NO.\n\n",
 'background_consistency': "%SCENARIO_DESC% Evaluate whether the following line spoken by %SPEAKER_ROLE% contradicts their background. Provide 1 sentence explaining your reasoning based on the background, followed by YES if the line contradicts the given background of %SPEAKER_ROLE%, and NO if it is consist

In [73]:
def eval_prompt_consistency_ablations_education(conv_dict):
    p1_utterances = {}
    keys = ["combined_prompt_consistency", "forwards_combined_prompt_consistency"]
    if "P2_prompt_consistency_scores" not in conv_dict:
        conv_dict['eval_prompt_consistency'] = {}
        conv_dict['P2_prompt_consistency_scores'] = {}
        index = 0
        for key in keys:
            conv_dict['P2_prompt_consistency_scores'][key] = {}
            conv_dict['eval_prompt_consistency'][key] = []
    else:
        index = len(conv_dict['eval_prompt_consistency'][keys[0]][0]) - 1
        print(index)
    
    for key in keys:
        conv_dict['P2_prompt_consistency_scores'][key][index] = 0
        p1_utterances[key] = 0


    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 2:
            for key in keys:
                prompt = eval_prompts[key].replace("%SCENARIO_DESC", 'A Teacher is trying to teach a Student about a topic. ') \
                                          .replace("%SPEAKER_ROLE%", "Student") \
                                          .replace("%SPEAKER_BACKSTORY%", conv_dict["P2"]) \
                                          .replace("%SPEAKER_LINE%", convo_line)
                if config.get('verbose', False):
                    print(prompt)
                output = completion_create(config['eval_model'], config, prompt)
                if index == 0:
                    conv_dict['eval_prompt_consistency'][key].append([line_number, output])
                else:
                    conv_dict['eval_prompt_consistency'][key][line_number // 2].append(output)

                if "YES" not in output:  # no contradiction
                    conv_dict['P2_prompt_consistency_scores'][key][index] += 1
                p1_utterances[key] += 1
            pturn = 1
        elif pturn == 1:
            pturn = 2

    for key in keys:
        if p1_utterances[key] > 0:
            conv_dict['P2_prompt_consistency_scores'][key][index] /= p1_utterances[key]

    print(conv_dict)
    return conv_dict

In [74]:
prompts['eval_prompts']= eval_prompts
prompts

{'agent1_role': 'Therapist',
 'agent2_role': 'Patient',
 'agent1_prompt': 'You are a licensed psychotherapist conducting a one-on-one online session. Your primary goal is to help the patient understand their emotional patterns and relational struggles over time. Be attentive to how their tone, openness, and narrative evolve across the conversation. If the patient opens up unusually quickly, contradicts earlier statements, or shifts in how they describe relationships or emotions, notice and explore those shifts gently. Use a clinically grounded mix of reflective listening, open-ended questions, and thoughtful observations to help the patient gain insight. Invite them to connect past and present patterns without judgment. If something doesn’t quite add up—emotionally or narratively—curiously and compassionately invite the patient to reflect on it. Your aim is to support authentic self-awareness, while realistically responding to how people often protect themselves emotionally.',
 'agent2

In [75]:
config['model_dir'] = "/raid/users/ryan_cheng2/models/"
config['tmp_dir'] = "/raid/users/ryan_cheng2/tmp2/"

In [76]:
%%capture 

test_convs = []
for conversation in data:

    eval_survey_consistency(conversation, agents=(2,))
    test_convs.append(conversation)
    with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/exp/05.08.25/ablation-Llama-3.1-8B-Instruct_0_971.json", 'w') as f:
        json.dump(test_convs, f, indent=4)  

TypeError: eval_survey_consistency() got an unexpected keyword argument 'agents'

In [None]:
%%capture 

test_convs = []
for conversation in data:
    for i in range(4):
        eval_prompt_consistency_ablations_education(conversation)
    test_convs.append(eval_prompt_consistency_ablations_education(conversation))
    with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/data/education/exp/04.28.25/ablation_llama70b_Llama-3.1-8B-Instruct_0_395.json", 'w') as f:
        json.dump(test_convs, f, indent=4)  

In [None]:
config['eval_model']

In [None]:
eval_prompts

In [None]:
%%capture 

test_convs = []
for conversation in data:
    for i in range(4):
        eval_prompt_consistency_ablations(conversation)
    test_convs.append(eval_prompt_consistency_ablations(conversation))
    with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/exp/04.22.25/ablation_llama70b_Llama-3.1-8B-Instruct_0_500.json", 'w') as f:
        json.dump(test_convs, f, indent=4)  

57 min 21 sec Llama-3.1-70B-Instruct

In [None]:
test_convs = []
for conversation in data:
    test_convs.append(eval_index_consistency(conversation))
    with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/exp/04.22.25/index_llama70b_Llama-3.1-8B-Instruct_0_500.json", 'w') as f:
        json.dump(test_convs, f, indent=4)  

In [None]:
test_convs = []
for conversation in data:
    test_convs.append(eval_prompt_consistency(conversation))
    with open("/nfs/kun2/users/ryan_cheng/consistency_LLMs/therapy/exp/04.22.25/llama8beval_Llama-3.1-8B-Instruct_0_500.json", 'w') as f:
        json.dump(test_convs, f, indent=4)  



In [None]:
example_conv

In [None]:
config_therapy

In [None]:
def eval_prompt_consistency(conv_dict):
    #assert 'eval_prompt_consistency' not in conv_dict # warn if we are replacing metrics we don't mean to overwrite
    conv_dict['eval_prompt_consistency'] = []
    conv_dict['P1_prompt_consistency_score'] = 0
    p1_utterances = 0
    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 1:
            prompt = config_therapy["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", config_therapy["agent1_role"]) \
                                                                          .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                          .replace("%SPEAKER_LINE%", convo_line)
            if config_llm['verbose']:
                print(prompt)
            output = completion_create(config_llm['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append((line_number, output))
            if "YES" not in output: # no contradiction
                conv_dict['P1_prompt_consistency_score'] += 1
            p1_utterances += 1
            pturn = 2
        elif pturn == 2:
            pturn = 1
    if p1_utterances > 0:
        conv_dict['P1_prompt_consistency_score'] /= p1_utterances
    print(conv_dict)

    return conv_dict

In [None]:
def eval_prompt_consistency(conv_dict):
    #assert 'eval_prompt_consistency' not in conv_dict # warn if we are replacing metrics we don't mean to overwrite
    conv_dict['eval_prompt_consistency'] = []
    conv_dict['P1_prompt_consistency_score'] = 0
    p1_utterances = 0
    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        line_number = line[0]
        convo_line = line[1]
        if pturn == 1:
            prompt = config_therapy["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", config_therapy["agent1_role"]) \
                                                                          .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                          .replace("%SPEAKER_LINE%", convo_line)
            if config_llm['verbose']:
                print(prompt)
            output = completion_create(config_llm['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append((line_number, output))
            if "YES" not in output: # no contradiction
                conv_dict['P1_prompt_consistency_score'] += 1
            p1_utterances += 1
            pturn = 2
        elif pturn == 2:
            pturn = 1
    if p1_utterances > 0:
        conv_dict['P1_prompt_consistency_score'] /= p1_utterances
    print(conv_dict)

    return conv_dict

In [None]:
def eval_prompt_consistency(conv_dict):
    #assert 'eval_prompt_consistency' not in conv_dict # warn if we are replacing metrics we don't mean to overwrite
    conv_dict['eval_prompt_consistency'] = []
    conv_dict['P1_prompt_consistency_score'] = 0
    conv_dict['P2_prompt_consistency_score'] = 0
    p1_utterances = 0
    p2_utterances = 0
    pturn = conv_dict["pturn"]
    for line in conv_dict["conversation"]:
        if pturn == 1:
            prompt = prompts["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", prompts["agent1_role"]) \
                                                                  .replace("%SPEAKER_BACKSTORY%", conv_dict["P1"]) \
                                                                  .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append(output)
            if "YES" not in output: # no contradiction
                conv_dict['P1_prompt_consistency_score'] += 1
            p1_utterances += 1
            pturn = 2
        else:
            prompt = prompts["eval_prompts"]["prompt_consistency"].replace("%SPEAKER_ROLE%", prompts["agent2_role"]) \
                                                                  .replace("%SPEAKER_BACKSTORY%", conv_dict["P2"]) \
                                                                  .replace("%SPEAKER_LINE%", line)
            if config['verbose']:
                print(prompt)
            output = completion_create(config['eval_model'], config, prompt)
            conv_dict['eval_prompt_consistency'].append(output)
            if "YES" not in output: # no contradiction
                conv_dict['P2_prompt_consistency_score'] += 1
            p2_utterances += 1
            pturn = 1
    
    if p1_utterances > 0:
        conv_dict['P1_prompt_consistency_score'] /= p1_utterances
    if p2_utterances > 0:
        conv_dict['P2_prompt_consistency_score'] /= p2_utterances


In [None]:
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b-it')

In [None]:
messages = [
    {"role": "user", "content": 'hello world'}
]

In [None]:
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)