## Init

In [1]:
# ---------------- Imports ----------------
import os

from datetime import datetime
from transformers import AutoTokenizer

import torch
import yaml

from sentence_transformers import SentenceTransformer
from sentence_transformers import CrossEncoder

from elicitation.metrics.progression import progression
from elicitation.metrics.turn_length_ratio import turn_length_ratio
from elicitation.metrics.utils import load_dialogues


In [2]:
# ---------------- Args ----------------
# Paths
embedding_model_choice_name = "sentence-transformers/all-MiniLM-L12-v2"
#cross_encoder_model_choice_name = "cross-encoder/stsb-roberta-large"
tokenizer_model_choice_name = "meta-llama/Llama-3.2-3B-Instruct"

dataset_choice_name = "evaluation/generated-utterances-dialogue/20251227T2034-20251227t0851-deepseek-r1-distill-llama-8b-seq-std-m3trained/generated"





# Constants
PROGRESSION_K = 5
PROGRESSION_GAMMA = 0.5
#CONVERSATIONAL_CONTROL_K = 2 
#CONVERSATIONAL_CONTROL_GAMMA = 0.9



In [3]:
# ---------------- Config ----------------
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

with open("../../config/config.yaml", "r") as f:
    config = yaml.safe_load(f)

proj_store = config["paths"]["proj_store"]

data_path = os.path.join(config["paths"]["proj_store"], "data")

dataset_path = os.path.join(proj_store, dataset_choice_name)

models_folderpath = config["paths"]["models"]

embedding_model_choice = os.path.join(models_folderpath, embedding_model_choice_name)
#cross_encoder_model_choice = os.path.join(models_folderpath, cross_encoder_model_choice_name)
tokenizer_model_choice = os.path.join(models_folderpath, tokenizer_model_choice_name)


save_folder_path = os.path.join(proj_store, "evaluation", "interaction-metrics", f"{dataset_choice_name}")
os.makedirs(save_folder_path, exist_ok=True)


# Load llama tokenizer
tokenizer_model = AutoTokenizer.from_pretrained(tokenizer_model_choice, trust_remote_code=True)

# Load sentence embedding model
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
embedding_model = SentenceTransformer(embedding_model_choice, device=device)
#cross_encoder = CrossEncoder(cross_encoder_model_choice, device=device)



#    # Use the chat template to generate properly formatted model input
#    context = tokenizer.apply_chat_template(
#        messages,
#        tokenize=False,
#        add_generation_prompt=True  # adds the final assistant stub
#    )
#    return context





cuda


In [4]:
all_dialogues = list(load_dialogues(dataset_path))

print("Loaded", len(all_dialogues), "dialogues")



Loaded 10285 dialogues


## Progression

In [5]:
#progression_df = progression(
#    dialogues=all_dialogues, 
#    cross_encoder=cross_encoder, 
#    k=PROGRESSION_K, 
#    gamma=PROGRESSION_GAMMA, 
#    group_by="domain", 
#    sort_by="domain"
#)
#
#display(progression_df)


In [6]:
progression_df = progression(
    dialogues=all_dialogues, 
    embedding_model=embedding_model, 
    device=device, 
    k=PROGRESSION_K, 
    gamma=PROGRESSION_GAMMA, 
    group_by="domain", 
    sort_by="domain"
)

display(progression_df)



Progression: 100%|██████████| 10285/10285 [01:37<00:00, 105.33it/s]


Unnamed: 0,domain,dialogues,progression
0,academic_interviews,1125,0.697155
1,journalistic_investigations,183,0.742762
2,judicial_proceedings,4949,0.752136
3,oral_history,4028,0.732349


In [7]:
# Save to CSV
progression_df.to_csv(os.path.join(save_folder_path, f"progression.csv"), index=False)



## Conversational Control

In [8]:
#control_df = conversational_control(
#    dialogues=all_dialogues, 
#    embedding_model=embedding_model, 
#    device=device, 
#    k=CONVERSATIONAL_CONTROL_K, 
#    gamma=CONVERSATIONAL_CONTROL_GAMMA, 
#    group_by="domain", 
#    sort_by="domain"
#)
#
#display(control_df)



In [9]:
# Save to CSV
#control_df.to_csv(os.path.join(save_folder_path, f"conversational_control.csv"), index=False)



## Turn-Length Ratio

In [10]:
turn_length_df = turn_length_ratio(
    dialogues=all_dialogues, 
    tokenizer_model=tokenizer_model, 
    group_by="domain", 
    sort_by="domain"
)

display(turn_length_df)



Turn Length: 100%|██████████| 10285/10285 [00:03<00:00, 2972.98it/s]


Unnamed: 0,domain,elicitor_avg_tokens,respondent_avg_tokens,turn_length_ratio
0,academic_interviews,15.26,28.64,1.877
1,journalistic_investigations,18.27,62.5,3.421
2,judicial_proceedings,30.59,45.14,1.476
3,oral_history,16.16,58.7,3.632


In [11]:
# Save to CSV
turn_length_df.to_csv(os.path.join(save_folder_path, f"turn_length_ratio.csv"), index=False)



## Combine

In [128]:
# Remove extra columns
#progression = progression_df.drop(columns=["dialogues"])
#conversational_control = control_df.drop(columns=["prompt_response_pairs"])
#turn_length_ratio = turn_length_df.drop(columns=["elicitor_avg_tokens", "respondent_avg_tokens"])



In [129]:
# Merge all on 'Domain'
#combined_df = progression.merge(conversational_control, on="domain").merge(turn_length_ratio, on="domain")
#combined_df = progression.merge(turn_length_ratio, on="domain")


#display(combined_df)



In [130]:
#combined_df.to_csv(os.path.join(save_folder_path, f"combined_metrics.csv"), index=False)

