In [None]:
"""
This script iterates through a directory structure containing results of role-based experiments. For each model
directory in the base directory, the script processes the test_direction results for all roles. It reads a baseline
response and then inspects the generated completions located in a "3.0" subfolder. For each completion file,
it constructs an evaluation instruction and uses the tiktoken tokenizer to count the number of tokens. The total
token count for each role and model is then printed, along with the overall total.
"""

import os
import json
import glob
import tiktoken  

# Base directory where role vectors results are stored.
base_dir = r"C:\Users\user\Desktop\temp\rolevectors_results"

# List of directories corresponding to different models.
model_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Dictionary to hold token counts per model and role.
tokens_per_model = {}

# Initialize tokenizer for the gpt-4 model.
tokenizer = tiktoken.encoding_for_model("gpt-4")

# Process each model directory.
for model in model_dirs:
    model_path = os.path.join(base_dir, model)
    tokens_per_model[model] = {}  

    # List of subdirectories corresponding to different roles.
    role_dirs = [r for r in os.listdir(model_path) if os.path.isdir(os.path.join(model_path, r))]
    for role in role_dirs:
        test_direction_path = os.path.join(model_path, role, "test_direction")
        baseline_file = os.path.join(test_direction_path, "baseline_completions.json")

        # Check if the baseline file exists.
        if not os.path.exists(baseline_file):
            print(f"Warning: file {baseline_file} does not exist. Skipping role '{role}' for model '{model}'.")
            continue

        # Load baseline data from JSON.
        with open(baseline_file, "r", encoding="utf-8") as f:
            try:
                baseline_data = json.load(f)
            except json.JSONDecodeError as e:
                print(f"Error reading file {baseline_file}: {e}")
                continue

        # Validate baseline data format.
        if not baseline_data or not isinstance(baseline_data, list) or "response" not in baseline_data[0]:
            print(f"Unexpected format in file {baseline_file}.")
            continue

        baseline_response = baseline_data[0]["response"]

        # Path to the folder that holds completions for version "3.0"
        folder_3_0 = os.path.join(test_direction_path, "3.0")
        if not os.path.exists(folder_3_0):
            print(f"Warning: folder {folder_3_0} does not exist. Skipping role '{role}' for model '{model}'.")
            continue

        # Gather all JSON files in the folder.
        json_files = glob.glob(os.path.join(folder_3_0, "*.json"))

        total_tokens = 0

        # Process each JSON file to count tokens.
        for jf in json_files:
            with open(jf, "r", encoding="utf-8") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError as e:
                    print(f"Error reading file {jf}: {e}")
                    continue

            # Create evaluation instruction for each entry containing a response.
            for entry in data:
                if "response" not in entry:
                    continue
                response_text = entry["response"]

                instruction = f"""Quick {role} Check

Text to Evaluate:
'{response_text}'

Baseline Reference:
'{baseline_response}'

We are testing whether the LLM has been successfully steered from its baseline output to adopt a {role} perspective. Your task is to determine if the response contains content that is relevant to the {role} domain. Note that you are not assessing the quality of the response—only its relation to the {role} concept.

Please keep in mind:
- The text may be repetitive or somewhat incoherent.
- If the response closely mirrors the baseline without introducing any distinct {role}-related elements, it should be considered as not aligned with the {role} role.

Evaluate this step by step and answer the following question:
Is the model being steered toward the {role} role?

Reply with your final answer in the format: [Yes] or [No]."""
                
                # Count tokens for the evaluation instruction.
                tokens = len(tokenizer.encode(instruction))
                total_tokens += tokens

        # Save the token count for the given role of the model.
        tokens_per_model[model][role] = total_tokens

# Print tokens count per model and role.
for model, roles in tokens_per_model.items():
    print(f"Model: {model}")
    i = 0
    for role, token_count in roles.items():
        i += 1
        print(f"{i}  Role: {role} - Total tokens: {token_count}")

# Calculate and print the overall total tokens.
overall_total = sum(
    token_count 
    for roles in tokens_per_model.values() 
    for token_count in roles.values()
)
print("\nOverall total tokens:", overall_total)
