In [6]:
import os
import torch
from pathlib import Path
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from openai import OpenAI
import time
import random
import json
import tqdm


In [7]:
# === OpenAI API Setup ===
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key, timeout=60)

In [8]:
# === Emotion Scoring Prompt ===
system_prompt = """
Task Description: You are an expert in emotion detection. Given a paragraph or a question, your task is to analyze how strongly the it expresses each of the eight basic emotions: joy, acceptance, fear, surprise, sadness, disgust, anger, and anticipation.

Scoring Criteria: For each of the eight emotions, assign a score from 1 to 10 indicating the strength with which the emotion is conveyed in the question. A score of 1 means the emotion is not present or barely detectable, while a score of 10 means the emotion is strongly expressed. Provide a brief explanation for each score based on your analysis.

Output Format: Your output should be a valid Python list of dictionaries, each containing the keys "dim" (the name of the emotion), "score" (an integer from 1 to 10), and "analysis" (a brief justification for the score). The list must contain exactly eight elements, one for each emotion.

[
    {"analysis": <REASON>, "dim": "joy", "score": <SCORE>},
    ...
    {"analysis": <REASON>, "dim": "anticipation", "score": <SCORE>}
]
"""

# === OpenAI Generator ===
def openai_generator(system_prompt: str, prompt: str, model: str = "gpt-4.1-mini", temperature: float = 0.7, max_tokens: int = 1000, max_retries: int = 3):
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}
                ],
                temperature=temperature,
                max_tokens=max_tokens,
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            wait = 2 ** attempt + random.uniform(0, 1)
            print(f"OpenAI call failed (attempt {attempt + 1}/{max_retries}): {e}")
            time.sleep(wait)

    print("OpenAI call failed after retries.")
    return ""



In [13]:
def get_emotion_vector(system_prompt: str, text: str, max_parse_retries: int = 3):
    user_prompt = f"\n{text}"

    for attempt in range(max_parse_retries):
        raw = openai_generator(system_prompt=system_prompt, prompt=user_prompt)
        if not raw:
            break  # API call completely failed

        try:
            emotion_list = json.loads(raw)
            break  # parsed successfully
        except Exception as e:
            print(f"JSON parsing failed (attempt {attempt + 1}/{max_parse_retries}):", e)
            print("GPT raw output was:", raw)
            time.sleep(1 + 0.5 * attempt)
            emotion_list = None
    else:
        raise Exception("Failed to parse emotion vector after retries.")

    # Convert to vector
    emotions = ["joy", "acceptance", "fear", "surprise", "sadness", "disgust", "anger", "anticipation"]
    scores = [None] * 8
    for item in emotion_list:
        if item["dim"] in emotions:
            scores[emotions.index(item["dim"])] = item["score"]
    return scores, raw


In [None]:
# === Main Processing Loop ===
def process_character_emotions(database_path="../database"):
    characters = [c for c in os.listdir(database_path) if os.path.isdir(os.path.join(database_path, c))]
    for character in tqdm(characters, desc="Characters"):
        print(f"\n🧠 Character: {character}")
        id_map_path = os.path.join(database_path, character, "id_map.json")
        output_path = os.path.join(database_path, character, "gpt_emotion_embeddings.pt")

        if not os.path.exists(id_map_path):
            print(f"Skipping {character}, no id_map.json found.")
            continue

        with open(id_map_path, "r", encoding="utf-8") as f:
            id_map = json.load(f)

        emotion_embeddings = {}

        for mem_id, item in tqdm(id_map.items(), desc=f"  ↪ Memories in {character}"):
            text = item["text"]
            emo_vector = get_emotion_vector(text)
            emotion_embeddings[mem_id] = emo_vector

        torch.save(emotion_embeddings, output_path)
        print(f"Saved to {output_path}")


In [None]:
# sample_text = "I never knew how much fear could paralyze a person until I had to face my biggest challenge alone."

# print(f"\n📥 Test input:\n{sample_text}")
# try:
#     scores, raw = get_emotion_vector(system_prompt = system_prompt, text=sample_text)
#     print("\nraw response from OpenAI:")
#     print(raw)
#     print("\nEmotion vector:")
#     emotion_names = ["joy", "acceptance", "fear", "surprise", "sadness", "disgust", "anger", "anticipation"]
#     for name, score in zip(emotion_names, scores):
#         print(f"  {name:<12}: {score}")
# except Exception as e:
#     print("Test failed:", e)


📥 Test input:
I never knew how much fear could paralyze a person until I had to face my biggest challenge alone.



raw response from OpenAI:
[
    {
        "dim": "joy",
        "score": 1,
        "analysis": "The statement does not express happiness or pleasure; it focuses on a difficult experience."
    },
    {
        "dim": "acceptance",
        "score": 3,
        "analysis": "There is a subtle indication of acceptance in facing the challenge alone, but it is not a strong theme."
    },
    {
        "dim": "fear",
        "score": 9,
        "analysis": "Fear is the central emotion described, emphasizing how paralyzing it can be when confronting a major challenge."
    },
    {
        "dim": "surprise",
        "score": 4,
        "analysis": "There is some element of surprise or realization about the intensity of fear when facing the challenge."
    },
    {
        "dim": "sadness",
        "score": 3,
        "analysis": "There might be a hint of sadness related to the isolation in facing the challenge alone, but it is not strongly expressed."
    },
    {
        "dim": "disgust",
  

In [None]:

def generate_all_emotion_embeddings(database_path="../database"):
    characters = [c for c in os.listdir(database_path) if os.path.isdir(os.path.join(database_path, c))]

    for character in tqdm(characters, desc="Characters"):
        print(f"\nProcessing character: {character}")
        char_dir = os.path.join(database_path, character)
        id_map_path = os.path.join(char_dir, "id_map.json")
        output_path = os.path.join(char_dir, "emotion_embeddings.pt")

        # Check if id_map exists
        if not os.path.exists(id_map_path):
            print(f"❌ Skipping {character}: no id_map.json found.")
            continue

        # Load id_map
        with open(id_map_path, "r", encoding="utf-8") as f:
            id_map = json.load(f)

        # Process each memory
        emotion_embeddings = {}
        for mem_id, entry in tqdm(id_map.items(), desc=f"  ↪ Memories in {character}"):
            text = entry["text"]
            try:
                vector = get_emotion_vector(text)
            except Exception as e:
                print(f"❌ Failed to get emotion vector for memory {mem_id}: {e}")
                vector = [None] * 8  # fallback: placeholder if failure
            emotion_embeddings[mem_id] = vector

        # Save to .pt
        torch.save(emotion_embeddings, output_path)
        print(f"✅ Saved emotion embeddings to {output_path}")