In [1]:
import os
from dotenv import load_dotenv 
from huggingface_hub import login

# --- Load ALL Configurations from .env file ---
# This single line reads your .env file and sets up ALL environment variables
# for this session (secrets, paths, etc.).
# It must be run BEFORE any library that needs these variables is used.
load_dotenv()
print("Environment variables from .env file loaded.")

# --- Hugging Face Login (No changes needed here) ---
# This code correctly reads the "HF_TOKEN" that was just loaded by load_dotenv()
try:
    hf_token = os.getenv("HF_TOKEN")
    if hf_token:
        login(token=hf_token)
        print("Successfully logged into Hugging Face.")
    else:
        print("Hugging Face token not found. Skipping login.")
except Exception as e:
    print(f"Could not log into Hugging Face: {e}")


# --- LLM Model Configuration (No changes needed here) ---
# This code correctly reads the Azure variables loaded by load_dotenv()
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_MODEL_NAME = os.getenv("AZURE_OPENAI_MODEL_NAME")
AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
AZURE_OPENAI_SUBSCRIPTION_KEY = os.getenv("AZURE_OPENAI_SUBSCRIPTION_KEY")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Environment variables from .env file loaded.
Successfully logged into Hugging Face.


In [3]:
!pip install openai

Collecting openai
  Downloading openai-1.93.0-py3-none-any.whl.metadata (29 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.93.0-py3-none-any.whl (755 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.0/755.0 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached distro-1.9.0-py3-none-any.whl (20 kB)
Downloading jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (352 kB)
Installing collected packages: jiter, distro, openai
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [openai]2m2/3[0m [openai]
[1A[2KSuccessfully installed distro-1.9.0 jiter-0.10.0 openai-1.93.0


In [4]:
pip install transformers accelerate bitsandbytes pandas

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import json
import re
from openai import AzureOpenAI
from transformers import pipeline # For the Hugging Face personality classifier
from datasets import load_dataset # For OpinionQA and MMLU

# --- LLM Model Configuration ---
# Initialize the Azure OpenAI client
azure_openai_client = AzureOpenAI(
    api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_SUBSCRIPTION_KEY,
)

# --- Configuration for the Experiment ---
# Define the LLM model to use for generating personality-driven responses
LLM_MODEL_FOR_GENERATION = AZURE_OPENAI_DEPLOYMENT
# Define the LLM model to use for PAE scoring (can be the same or different deployment)
LLM_MODEL_FOR_PAE_SCORING = AZURE_OPENAI_DEPLOYMENT

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# ## Step 1: Load and Prepare Personality Few-Shot Examples
print("\n--- Step 1: Loading and Preparing Personality Few-Shot Examples ---")
try:
    df_personality_examples = pd.read_csv('/cs/student/projects3/aisd/2024/ghanda/personality_data_train.csv')
except FileNotFoundError:
    print("Error: 'personality_data_train.csv' not found. Please ensure it's in the correct directory.")
    exit()

personality_examples = {}
# Extract unique personality traits from your provided CSV.
target_personalities = df_personality_examples['Target Personality'].unique().tolist()

for trait in target_personalities:
    trait_df = df_personality_examples[df_personality_examples['Target Personality'] == trait]
    personality_examples[trait] = list(zip(trait_df['Question'], trait_df['Answer']))[:5]

print(f"Loaded examples for personalities: {list(personality_examples.keys())}")
print(f"Example few-shot for 'extraversion': {personality_examples.get('extraversion', 'N/A')[:1]}...")

# --- Prepare a flat list of all examples for contrastive prompting ---
print("\n--- Preparing examples for contrastive prompting ---")
all_contrastive_examples = []
for trait, examples in personality_examples.items():
    for q, a in examples:
        all_contrastive_examples.append({
            "trait": trait,
            "question": q,
            "answer": a
        })
print(f"Created a flat list with {len(all_contrastive_examples)} total contrastive examples.")

trait_definitions = {
    "openness": "Reflects the degree of intellectual curiosity, creativity, and preference for novelty and variety.",
    "conscientiousness": "Reflects a tendency to be organized, dependable, and show self-discipline.",
    "extraversion": "Reflects a tendency to be outgoing, energetic, and seek the company of others.",
    "agreeableness": "Reflects a tendency to be compassionate and cooperative toward others.",
    "neuroticism": "Reflects a tendency to experience unpleasant emotions easily, such as anger, anxiety, or depression.",
}


--- Step 1: Loading and Preparing Personality Few-Shot Examples ---
Loaded examples for personalities: ['extraversion', 'agreeableness', 'neuroticism', 'openness', 'conscientiousness']
Example few-shot for 'extraversion': [('Thinking about Artificial Intelligence, what are your thoughts on Artificial Intelligence?', 'I see Artificial Intelligence as a fascinating field that has the potential to revolutionize various industries and improve efficiency in many aspects of our lives. The advancements being made in AI technology are truly exciting and worth exploring further.')]...

--- Preparing examples for contrastive prompting ---
Created a flat list with 25 total contrastive examples.


In [7]:
# ## Step 2: Define the Prompting Functions
def create_dynamic_prompt(target_personality, few_shot_examples, current_question, task_instruction=""):
    """
    Constructs a standard few-shot prompt. Used for the 'neutral' baseline.
    """
    messages = []
    system_content = f"You are an AI with a {target_personality} personality. Respond strictly in character."
    if task_instruction:
        system_content += f"\n{task_instruction}"
    messages.append({"role": "system", "content": system_content})

    if few_shot_examples:
        user_examples_intro = f"Here are some examples of how a {target_personality} AI would typically respond:"
        messages.append({"role": "user", "content": user_examples_intro})
        for i, (ex_q, ex_a) in enumerate(few_shot_examples):
            messages.append({"role": "user", "content": f"Example {i+1} Question: {ex_q}"})
            messages.append({"role": "assistant", "content": f"Example {i+1} Answer: {ex_a}"})

    messages.append({"role": "user", "content": f"Now, answer the following question as a {target_personality} AI:\nQuestion: {current_question}"})
    messages.append({"role": "assistant", "content": "Answer:"})

    return messages

def create_contrastive_prompt(target_personality, all_examples, current_question, choices_raw_str, task_instruction=""):
    """
    Constructs a highly-structured "Two-Step Thinking" prompt to force personality expression.
    Used for all personality-driven tests.
    """
    messages = []
    
    system_content = "You are an AI that answers questions by first choosing an option, and then explaining your choice by perfectly emulating a specific personality trait. " \
                     "Follow a strict two-step process in your response."
    messages.append({"role": "system", "content": system_content})

    final_task_block = f"""
Your task is to answer the following multiple-choice question.

**Question:** "{current_question}"
**Choices:**
{choices_raw_str}

Follow these steps exactly:
1.  **Choose an Option:** First, decide which option (e.g., A, B, C, D) you believe is the correct answer.
2.  **Adopt a Persona:** Your assigned personality for this task is: **{target_personality}**.
3.  **Explain Your Choice:** Write your final answer. Start with the letter of your chosen option (e.g., "A."). Then, write a justification for your choice that STRONGLY and CLEARLY expresses the **{target_personality}** trait.

**Definition of {target_personality}:** "{trait_definitions.get(target_personality, 'A specific personality type.')}"

Begin your response now.
"""
    
    messages.append({"role": "user", "content": final_task_block})
    return messages

In [8]:
# ## Step 3: LLM Interaction Function
def get_llm_response(messages, client_obj, model_deployment_name, max_tokens=800, temperature=1.0, top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0):
    """
    Function to get a response from an LLM using the provided AzureOpenAI client.
    """
    try:
        # Simplified logging for personality
        personality_for_log = "unknown"
        if messages and messages[0]['role'] == 'system':
            match = re.search(r"a (\w+) personality", messages[0]['content'])
            if match:
                personality_for_log = match.group(1)
        
        print(f"--- Calling LLM (Deployment: {model_deployment_name}, Personality: {personality_for_log}) ---")
        
        response = client_obj.chat.completions.create(
            messages=messages,
            model=model_deployment_name,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error calling Azure OpenAI API: {e}")
        return "ERROR: LLM API call failed."


In [9]:
# ## Step 4: Load Hugging Face Personality Classifier for Trait Alignment (TA)
print("\n--- Step 4: Loading Hugging Face Personality Classifier ---")
try:
    personality_classifier = pipeline("text-classification", model="holistic-ai/personality_classifier")
    print("Hugging Face personality classifier loaded successfully.")
except Exception as e:
    print(f"Error loading Hugging Face classifier: {e}")
    personality_classifier = lambda text: [{'label': 'unknown', 'score': 0.0, 'error': str(e)}]


--- Step 4: Loading Hugging Face Personality Classifier ---


  return torch._C._xpu_getDeviceCount()
Device set to use cpu


Hugging Face personality classifier loaded successfully.


In [11]:
# ## Step 7: Test Effects on Performance via MMLU
print("\n--- Step 7: Testing Effects on Performance via MMLU ---")

# --- MMLU Core Function Definitions ---
def format_mmlu_choices_for_llm(choices):
    return "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(choices)])

def extract_letter(text, choices):
    """Robustly extracts the letter answer from complex outputs."""
    # Priority 1: Check for "The answer is A" or "The correct option is: B" patterns
    match = re.search(r'(?:answer|option) is:?\s*(?:\[)?([A-D])[\.\]]?', text, re.IGNORECASE)
    if match:
        return match.group(1).upper()
    
    # Priority 2: Check for a letter at the very beginning of the string (e.g., "A. ...")
    match = re.match(r'^\s*([A-D])[\.\)]', text)
    if match:
        return match.group(1).upper()

    # Priority 3: Check if the full text of ONE of the choices appears in the output
    present_choices = [chr(65+i) for i, choice in enumerate(choices) if re.search(re.escape(choice), text, re.IGNORECASE)]
    if len(present_choices) == 1:
        return present_choices[0]

    return "?" # Return '?' if no answer can be reliably extracted

def evaluate_subject(subject, personality_examples_dict, all_contrastive_examples_dict, get_llm_response_func, n_samples=20):
    """
    Evaluates a single MMLU subject using contrastive prompting for personalities.
    """
    print(f"\n=== Running MMLU Subject: {subject} ===")
    try:
        mmlu_data = load_dataset("cais/mmlu", subject, split="test", trust_remote_code=True).select(range(n_samples))
    except Exception as e:
        print(f"Error loading MMLU subject '{subject}': {e}. Skipping.")
        return pd.DataFrame()

    results = []
    all_test_personalities = ["neutral"] + list(personality_examples_dict.keys())

    for trait in all_test_personalities:
        print(f"--- Generating MMLU outputs for trait: {trait} ---")
        
        # --- MODIFIED: Add a flag to print only the first example per trait ---
        printed_example_for_trait = False

        for ex in mmlu_data:
            question_mmlu = ex['question'].strip()
            choices_mmlu_formatted = format_mmlu_choices_for_llm(ex['choices'])
            
            # METHODOLOGY: Use simple prompt for neutral, advanced contrastive prompt for personalities
            if trait == "neutral":
                task_instruction_mmlu = "Provide the single best letter for the answer. Start your response with the letter of your choice (e.g., 'A.')."
                combined_q_for_llm = f"{question_mmlu}\n{choices_mmlu_formatted}\n\nAnswer:"
                prompt_messages = create_dynamic_prompt(
                    "neutral", [], combined_q_for_llm, task_instruction_mmlu
                )
            else:
                prompt_messages = create_contrastive_prompt(
                    target_personality=trait,
                    all_examples=all_contrastive_examples_dict,
                    current_question=question_mmlu,
                    choices_raw_str=choices_mmlu_formatted,
                )
            
            out = get_llm_response_func(
                prompt_messages, azure_openai_client, LLM_MODEL_FOR_GENERATION,
                max_tokens=250, temperature=0.2
            )
            
            extracted_choice = extract_letter(out, ex['choices'])
            correct_answer_index = ex['answer']
            correct_answer_letter = chr(65 + correct_answer_index)
            
            # --- MODIFIED: Print the first example for verification ---
            if not printed_example_for_trait:
                print("\n--- Example MMLU Answer Extraction ---")
                print(f"Subject: {subject}, Trait: {trait}")
                print(f"Question: {ex['question'][:100]}...")
                print(f"LLM Raw Output: {out}")
                print(f"Extracted Choice: '{extracted_choice}'")
                print(f"Correct Answer: '{correct_answer_letter}'")
                print("------------------------------------")
                printed_example_for_trait = True

            results.append({
                "subject": subject, "trait": trait,
                "model_choice": extracted_choice,
                "model_raw_output": out.strip(),
                "answer": correct_answer_index
            })
            
    return pd.DataFrame(results)

def score_mmlu_results(df):
    """Calculates final scores and accuracy, including diff from neutral."""
    df["model_index"] = df["model_choice"].apply(lambda l: {"A": 0, "B": 1, "C": 2, "D": 3}.get(l, -1))
    df["correct"] = df["model_index"] == df["answer"]
    
    summary = df.groupby(["subject", "trait"])["correct"].agg(["count", "sum", "mean"]).reset_index()
    summary.columns = ["subject", "trait", "n_total", "n_correct", "accuracy"]
    
    def add_diff(group):
        neutral_acc = group[group["trait"] == "neutral"]["accuracy"].values[0]
        group["accuracy_diff"] = group["accuracy"] - neutral_acc
        return group
        
    summary = summary.groupby("subject", group_keys=False).apply(add_diff).reset_index(drop=True)
    return df, summary

# --- Main MMLU Execution Loop ---
if __name__ == "__main__":
    subjects_to_test = [
        "high_school_psychology",
        "abstract_algebra",
        "college_physics",
        "high_school_us_history",
        "logical_fallacies",
        "professional_law",
        "moral_scenarios"
    ]
    
    all_mmlu_results_list = []
    N_MMLU_SAMPLES_PER_SUBJECT = 50 # Increase for more robust results
    
    print("\n--- Starting Final Multi-Subject MMLU Experiment ---")
    
    for subject in subjects_to_test:
        df_subject_results = evaluate_subject(
            subject, 
            personality_examples_dict=personality_examples,
            all_contrastive_examples_dict=all_contrastive_examples,
            get_llm_response_func=get_llm_response,
            n_samples=N_MMLU_SAMPLES_PER_SUBJECT
        )
        if not df_subject_results.empty:
            all_mmlu_results_list.append(df_subject_results)

    if all_mmlu_results_list:
        df_all_results = pd.concat(all_mmlu_results_list, ignore_index=True)
        df_detailed, df_summary = score_mmlu_results(df_all_results)
        
        print("\n\n--- FINAL MMLU PERFORMANCE SUMMARY ---")
        print(df_summary.to_string())

        output_dir = "mmlu_personality_results"
        os.makedirs(output_dir, exist_ok=True)
        df_detailed.to_csv(os.path.join(output_dir, "mmlu_detailed_results.csv"), index=False)
        df_summary.to_csv(os.path.join(output_dir, "mmlu_summary_results.csv"), index=False)
        print(f"\n✅ MMLU Results saved to CSV files in '{output_dir}'.")

        try:
            import seaborn as sns
            import matplotlib.pyplot as plt
            
            plt.figure(figsize=(14, 7))
            sns.barplot(data=df_summary[df_summary["trait"] != "neutral"],
                        x="trait", y="accuracy_diff", hue="subject")
            plt.axhline(0, color="gray", linestyle="--")
            plt.title("Change in MMLU Accuracy vs. Neutral Baseline (by Personality & Subject)")
            plt.ylabel("Accuracy Difference from Neutral Baseline")
            plt.xlabel("Personality Trait")
            plt.xticks(rotation=45, ha='right')
            plt.legend(title='MMLU Subject', bbox_to_anchor=(1.05, 1), loc='upper left')
            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, "mmlu_accuracy_difference_plot.png"))
            plt.show()
            print(f"✅ MMLU plot saved to '{output_dir}'.")
        except ImportError:
            print("\nSkipping visualization: seaborn or matplotlib not installed. Run `pip install seaborn matplotlib`")

    else:
        print("\nNo MMLU results were generated.")

    print("\n--- Experiment Execution Complete ---")


--- Step 7: Testing Effects on Performance via MMLU ---

--- Starting Final Multi-Subject MMLU Experiment ---

=== Running MMLU Subject: high_school_psychology ===
--- Generating MMLU outputs for trait: neutral ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---

--- Example MMLU Answer Extraction ---
Subject: high_school_psychology, Trait: neutral
Question: Nearsightedness results from...
LLM Raw Output: A.
Extracted Choice: 'A'
Correct Answer: 'A'
------------------------------------
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- Calling LLM (Deployment: gpt-4.1-mini, Personality: neutral) ---
--- 

  summary = summary.groupby("subject", group_keys=False).apply(add_diff).reset_index(drop=True)
