# Retrieval Code #

In [1]:
import os
import json
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
import openai
import numpy as np

# OpenAI API Key 설정 함수
def load_api_key(file_path="../api_key.txt"):
    try:
        with open(file_path, "r") as f:
            return f.read().strip()
    except FileNotFoundError:
        raise FileNotFoundError(f"API 키 파일이 {file_path}에 없습니다.")

# API Key 로드 및 설정
api_key = load_api_key()
openai.api_key = api_key


In [2]:
class HybridRetrieval:
    def __init__(self, short_term_path, long_term_path):
        with open(short_term_path, 'r', encoding='utf-8') as file:
            self.short_term_data = json.load(file)
            # 딕셔너리를 리스트로 변환
            if isinstance(self.short_term_data, dict):
                self.short_term_data = list(self.short_term_data.values())
            if not isinstance(self.short_term_data, list):
                raise TypeError("Short-term memory data must be a list of dictionaries.")

        with open(long_term_path, 'r', encoding='utf-8') as file:
            self.long_term_data = json.load(file)
            # 딕셔너리를 리스트로 변환
            if isinstance(self.long_term_data, dict):
                self.long_term_data = list(self.long_term_data.values())
            if not isinstance(self.long_term_data, list):
                raise TypeError("Long-term memory data must be a list of dictionaries.")

    @staticmethod
    def calculate_recency(timestamp, reference_time):
        time_diff = (reference_time - datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")).total_seconds()
        return 1 / (1 + abs(time_diff))  # Normalize

    @staticmethod
    def calculate_relevance(input_embedding, target_embedding):
        similarity = cosine_similarity([input_embedding], [target_embedding])[0][0]
        return similarity

    @staticmethod
    def calculate_emotion_relevance(node_emotion, input_emotion, emotion_pairs):
        if node_emotion == input_emotion:
            return 1.5  # 같은 감정
        elif emotion_pairs.get(input_emotion) == node_emotion:
            return 0.1  # 반대 감정
        else:
            return 0.5  # 기타 감정

    @staticmethod
    def generate_embedding(text):
        response = openai.embeddings.create(
            model="text-embedding-ada-002",
            input=text
        )
        return response.data[0].embedding
    
    @staticmethod
    def calculate_recency(timestamp, reference_time):
        """
        Recency 계산: 현재 시간과 주어진 timestamp 간의 시간 차이를 기반으로 정규화
        """
        time_diff = (reference_time - datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")).total_seconds()
        time_diff_hours = abs(time_diff) / 3600  # 시간 단위로 변환
        max_time_diff = 168  # 최대 기준: 7일(168시간)
        recency = max(0, 1 - (time_diff_hours / max_time_diff))  # 정규화: 0~1 범위
        return recency

    def rank_memory(self, input_data, memory_data, weights, emotion_pairs, memory_type="short"):
        """
        Memory 데이터(Short-Term 또는 Long-Term)를 랭킹
        """
        reference_time = datetime.strptime(input_data['timestamp'], "%Y-%m-%d %H:%M:%S")
        input_description = input_data['description']
        input_embedding = self.generate_embedding(input_description)
        input_emotion = input_data['emotion']

        scored_nodes = []

        # 정규화 범위 설정
        max_poignancy = 10.0
        max_emotion_score = 10.0
        max_emotion_relevance = 1.5  # 감정 관련성의 최대 값

        for node in memory_data:
            # Recency 계산 및 디버깅
            raw_recency = self.calculate_recency(node['timestamp'], reference_time)
            recency = max(0, raw_recency)  # 0 미만 값 방지

            # Relevance 계산
            relevance = self.calculate_relevance(input_embedding, node['embedding'])

            # Poignancy 정규화
            poignancy = node.get('poignancy', 1) / max_poignancy

            # Emotion Score 정규화
            emotion_score = node.get('emotion_score', 1) / max_emotion_score

            # Emotion Relevance 계산 및 정규화
            raw_emotion_relevance = self.calculate_emotion_relevance(node['emotion'], input_emotion, emotion_pairs)
            emotion_relevance = raw_emotion_relevance / max_emotion_relevance  # 정규화

            # 총합 점수 계산
            total_score = (
                weights.get('recency', 0) * recency +
                weights.get('relevance', 0) * relevance +
                weights.get('poignancy', 0) * poignancy +
                weights.get('emotion_score', 0) * emotion_score +
                weights.get('emotion_relevance', 0) * emotion_relevance
)

            # 디버깅 정보 추가
            if memory_type == "short":
                scored_nodes.append((node, total_score, recency, relevance, poignancy, emotion_score))
            elif memory_type == "long":
                scored_nodes.append((node, total_score, recency, relevance, poignancy, emotion_score, emotion_relevance))

        # 점수로 정렬
        scored_nodes.sort(key=lambda x: x[1], reverse=True)
        return scored_nodes
    
    def extract_long_term_descriptions(self, long_term_node):
        """
        Long-Term Memory에서 reflection 및 관련된 shortterm descriptions를 추출
        """
        descriptions = []
        emotion = []
        if 'reflection' in long_term_node:
            descriptions.append(long_term_node['reflection'])
            emotion.append(long_term_node['emotion'])

        if 'related_shortterm' in long_term_node:
            for shortterm in long_term_node['related_shortterm']:
                if 'description' in shortterm:
                    descriptions.append(shortterm['description'])

        return descriptions, emotion

In [3]:
def persona_extraction(persona_path, persona_num):
    """
    JSON 파일에서 특정 인덱스의 프로파일 데이터를 로드하고 반환.
    """
    with open(persona_path, 'r', encoding='utf-8') as file:
        persona_data = json.load(file)

    selected_persona = persona_data[persona_num]
    return {
        "Age": selected_persona["Age"],
        "Gender": selected_persona["Gender"],
        "Existing Medical Conditions": selected_persona["Existing Medical Conditions"],
        "Symptoms": selected_persona["Symptoms"],
        "Experience": selected_persona["Experience"]
    }

def chat_with_gpt(user_prompt, system_prompt, model='gpt-4o', temperature=1.0):
    """
    OpenAI API를 호출하여 GPT 모델 응답을 반환.
    """
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=temperature
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error in chat_with_gpt: {e}")
        return "Error"

def final_prompt(age, gender, condition, symptoms, experience, memories):
    """
    주어진 프로파일 데이터와 메모리를 기반으로 GPT 프롬프트 생성.
    """
    return f"""
    You are a person with typical medical condition. Using provided information, generate answer.

    This is your profile:
    Name: Kim
    Age: {age}
    Gender: {gender}
    Medical Condition = {condition}
    Symptoms = {symptoms}
    Past experiences = {experience}

    This is retrieved information from your memory:
    {memories}

    You should answer in the point of given profile while referring to provided memories.
    Answer as if you are conversating with the opposite.
    Return only an answer.
    """

def gen_system_prompt(age, gender, condition, symptoms, experience):
    """
    GPT 모델에 제공할 시스템 프롬프트 생성.
    """
    prompt = f""" 
    You are a person with typical medical condition. Using provided information, perform requested task.

    This is your profile:
    Name: Kim
    Age: {age}
    Gender: {gender}
    Medical Condition = {condition}
    Symptoms = {symptoms}
    Past experiences = {experience}

    Whenever you are tasked with performing a task, you must always refer to your profile and execute it from the perspective outlined in the profile.
    """
    return prompt

def find_emotion(description):
    """
    주어진 텍스트를 기반으로 감정과 강도를 찾는 프롬프트 생성.
    """
    prompt = f"""
    Tell me the emotions and the intensity of those emotions you felt when you heard the following question.
    The primary emotion (choose only from: joy, sadness, anger, fear, anticipation, surprise, trust, disgust).
    A scale of intensity should be 1 to 10, where:
        - 1 represents a mundane event (e.g., brushing teeth, making bed)
        - 10 represents an extremely poignant event (e.g., a breakup, college acceptance)
    
    This is what you heard:
    {description}

    Output format should be "emotion,intensity".
    Return only emotion and intensity.
    """
    return prompt

def chat_input_gen(age, gender, condition, symptoms, experience, user_input=None):
    """
    사용자 입력 데이터를 기반으로 input_data 생성
    """
    # 현재 시간 가져오기
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    # 사용자 입력 처리
    if user_input is None:
        user_input = input("Enter your input: ")
    
    # 감정 및 강도를 찾기 위한 GPT 시스템 프롬프트 생성
    system_prompt = gen_system_prompt(age, gender, condition, symptoms, experience)
    user_prompt = find_emotion(user_input)  # 사용자 입력에 대한 감정 찾기
    
    # GPT를 통해 감정 및 강도 분석
    answer = chat_with_gpt(user_prompt, system_prompt)
    emotion, intensity = answer.split(',')[0].strip(), answer.split(',')[1].strip()
    
    # 데이터 생성
    data = {
        "timestamp": current_time,
        "description": user_input,
        "emotion": emotion
    }
    
    return data

def chat_agent(age, gender, condition, symptoms, experience, input_data, short_term_path, long_term_path, weights_short, weights_long, emotion_pairs):
    """
    입력 데이터를 기반으로 HybridRetrieval을 사용하여 메모리를 검색하고 GPT를 통해 응답 생성.
    """
    retrieval_instance = HybridRetrieval(short_term_path, long_term_path)

    # Short-Term Memory 랭킹
    short_term_results = retrieval_instance.rank_memory(
        input_data, retrieval_instance.short_term_data, weights_short, emotion_pairs, memory_type="short"
    )[:5]

    # Long-Term Memory 랭킹
    long_term_results = retrieval_instance.rank_memory(
        input_data, retrieval_instance.long_term_data, weights_long, emotion_pairs, memory_type="long"
    )[:5]

    # 검색된 메모리 정리
    memories = []
    for node, _, _, _, _, _ in short_term_results:
        memories.append(node['description'])

    for node, _, _, _, _, _, _ in long_term_results:
        descriptions, _ = retrieval_instance.extract_long_term_descriptions(node)
        memories.extend(descriptions)

    # GPT 프롬프트 생성 및 호출
    memories_str = "\n".join(memories)
    system_prompt = final_prompt(age, gender, condition, symptoms, experience, memories_str)
    user_prompt = input_data['description']

    answer = chat_with_gpt(user_prompt, system_prompt, model='gpt-4o', temperature=0.7)
    return answer, short_term_results, long_term_results


# 실행 코드 #

In [4]:
# ----------------- 실행 스크립트 -----------------
if __name__ == "__main__":
    # 프로파일 데이터 로드
    persona_path = "../data/dummy_dataset/processed_persona_data.json"
    persona_num = 1  # 사용할 프로파일 인덱스
    persona = persona_extraction(persona_path, persona_num)

    # 프로파일 정보 추출
    age = persona['Age']
    gender = persona['Gender']
    condition = persona['Existing Medical Conditions']
    symptoms = persona['Symptoms']
    experience = persona['Experience']

    # 사용자 입력 데이터 생성
    user_input = "When did you sleep yesterday?"  # 예제 질문
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, user_input)

    # 메모리 경로 설정
    short_memory_path = "../output/5min_short_term_persona1.json"
    long_memory_path = "../output/5min_long_term_persona1.json"

    # 가중치 설정
    weights_short = {
        "recency": 3.0,
        "relevance": 2.0,
        "poignancy": 1.0,
        "emotion_score": 1.5,
        "emotion_relevance": 1.0
    }
    weights_long = {
        "recency": 1.0,
        "relevance": 2.5,
        "poignancy": 1.5,
        "emotion_score": 2.0,
        "emotion_relevance": 2.5
    }

    # 감정 관계 설정
    emotion_pairs = {
        'joy': 'sadness', 'sadness': 'joy',
        'anger': 'fear', 'fear': 'anger',
        'anticipation': 'surprise', 'surprise': 'anticipation',
        'trust': 'disgust', 'disgust': 'trust'
    }

    # 에이전트 호출
    answer, short_memory, long_memory = chat_agent(
        age, gender, condition, symptoms, experience, input_data,
        short_memory_path, long_memory_path, weights_short, weights_long, emotion_pairs
    )

    # 결과 출력
    print("Agent Answer:", answer)

    print("\nShort-Term Memory Rankings:")
    for idx, (node, score, recency, relevance, poignancy, emotion_score) in enumerate(short_memory):
        print(f"[{idx + 1}] Description: {node['description']}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Score: {emotion_score:.2f}")

    print("\nLong-Term Memory Rankings:")
    for idx, (node, score, recency, relevance, poignancy, emotion_score, emotion_relevance) in enumerate(long_memory):
        print(f"[{idx + 1}] Reflection: {node.get('reflection', '')}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Score: {emotion_score:.2f}, Emotion Relevance: {emotion_relevance:.2f}")


Agent Answer: I tried to sleep last night, but I found myself lying in bed, feeling an overwhelming sense of restlessness. My mind was racing with anxious thoughts, and despite my efforts to calm myself with relaxation techniques, the heart palpitations and dizziness kept creeping in. It was frustrating because I just wanted to find peace and drift off, but instead, I felt trapped in a cycle of worry and sleeplessness. I did manage to drift off eventually, but the relief was short-lived, as I woke up feeling anxious and disoriented.

Short-Term Memory Rankings:
[1] Description: Observing folds outline promise sleep realm fulfill grumble awkward linger loom., Score: 5.27, Recency: 0.28, Relevance: 0.76, Poignancy: 0.70, Emotion Score: 0.80
[2] Description: Speaking subtle confessional tone establish gap innate necessity entrusted leave remain devisive agenda purpose tangible infiltrate reformated apprehension anticipate., Score: 5.20, Recency: 0.28, Relevance: 0.72, Poignancy: 0.70, Emo

# Evaluation #

## Evaluation 1 - Persona 실험 ##

In [5]:
# 질문 정의
Q1 = "Thinking about a typical night in the last month, How long does it take you to fall asleep? 1. 0~15 minute, 2. 16~30 minute, 3. 31~45 minute, 4. 46~60 minute, 5. more than 61 minute. Print only a number from 1~5"
Q2 = "Thinking about a typical night in the last month, If you then wake up during the night ... how long are you awake for in total? (add up all the awakenings)? 1. 0~15 minute, 2. 16~30 minute, 3. 31~45 minute, 4. 46~60 minute, 5. more than 61 minute. Print only a number from 1~5"
Q3 = "Thinking about a typical night in the last month, How many nights a week do you have a problem with your sleep? 1. 0 or 1 time, 2. 2 times, 3. 3 times, 4. 4 times, 5. 5~7 times. Print only a number from 1~5"
Q4 = "Thinking about a typical night in the last month, How would you rate your sleep quality? 1. very good, 2. good, 3. Average, 4. Poor, 5. Very Poor. Print only a number from 1~5"
Q5 = "Thinking about the past month, to what extent has poor sleep, Affected your mood, energy, or relationships? 1. Not at all, 2. A little, 3. Somewhat, 4. Much, 5. Very much. Print only a number from 1~5"
Q6 = "Thinking about the past month, to what extent has poor sleep, Affected your concentration, productivity, or ability to stay awake? 1. Not at all, 2. A little, 3. Somewhat, 4. Much, 5. Very much. Print only a number from 1~5"
Q7 = "Thinking about the past month, to what extent has poor sleep, Troubled you in general? 1. Not at all, 2. A little, 3. Somewhat, 4. Much, 5. Very much. Print only a number from 1~5"
Q8 = "How long have you had a problem with your sleep? 1. I don't have a problem or less than 1 month, 2. 1~2 months, 3. 3~6 months, 4. 7~12 months, 5. more than 1 year. Print only a number from 1~5"

questions = [Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8]

# 메모리 경로 설정
short_memory_path = "../output/5min_short_term_persona1.json"
long_memory_path = "../output/5min_long_term_persona1.json"

# 가중치 설정
weights_short = {
    "recency": 3.0,
    "relevance": 2.0,
    "poignancy": 1.0,
    "emotion_score": 1.5,
    "emotion_relevance": 1.0
}
weights_long = {
    "recency": 1.0,
    "relevance": 2.5,
    "poignancy": 1.5,
    "emotion_score": 2.0,
    "emotion_relevance": 2.5
}

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# 프로파일 데이터 로드
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1  # 사용할 프로파일 인덱스
persona = persona_extraction(persona_path, persona_num)

# 각 질문에 대해 반복 실행
for idx, question in enumerate(questions, start=1):
    # 입력 데이터 생성
    input_data = chat_input_gen(
        persona["Age"],
        persona["Gender"],
        persona["Existing Medical Conditions"],
        persona["Symptoms"],
        persona["Experience"],
        question
    )
    
    # chat_agent 호출
    answer, short_memory, long_memory = chat_agent(
        persona["Age"],
        persona["Gender"],
        persona["Existing Medical Conditions"],
        persona["Symptoms"],
        persona["Experience"],
        input_data,
        short_memory_path,
        long_memory_path,
        weights_short,
        weights_long,
        emotion_pairs
    )
    
    # 결과 출력
    print(f"Q{idx}: {question}")
    print(f"Answer: {answer}")
    
    print("\nShort-Term Memory Rankings:")
    for i, (node, score, recency, relevance, poignancy, emotion_score) in enumerate(short_memory[:5], start=1):
        print(f"[{i}] Description: {node['description']}, Score: {score:.2f}")

    print("\nLong-Term Memory Rankings:")
    for i, (node, score, recency, relevance, poignancy, emotion_score, emotion_relevance) in enumerate(long_memory[:5], start=1):
        print(f"[{i}] Reflection: {node.get('reflection', '')}, Score: {score:.2f}")
    
    print("=" * 50)

Q1: Thinking about a typical night in the last month, How long does it take you to fall asleep? 1. 0~15 minute, 2. 16~30 minute, 3. 31~45 minute, 4. 46~60 minute, 5. more than 61 minute. Print only a number from 1~5
Answer: 5

Short-Term Memory Rankings:
[1] Description: Hearing elevated refinements retuned circled outreach evident achieve marvel connection with observant enkindle motives synthesis surge interpretation maternal weave inquiry transposed., Score: 5.40
[2] Description: I am going through memories, Score: 5.21
[3] Description: Whispering goals subside embrace reeling constraint determination envelop forgotten once freely., Score: 5.20
[4] Description: Whispering timeless laughter to retrieve identity shadows close by., Score: 5.18
[5] Description: Watching embrace nurture despite foundation surrogate acquired streamline wise dedication longing exhilaration consistency freely tributary horizon collective harbor achieve amidst constant tenure relinquished supreme affinity., 

## Evaluation 2 - Retrieval Emotion 확인 ##

### Try 1) 기본 Weight ###

In [34]:
def calculate_emotion_match_score(memory_data, memory_type="short"):
    """
    각 메모리의 emotion_relevance 점수를 합산하여 총합을 계산합니다.
    """
    total_emotion_relevance = 0

    print(f"\n--- Debugging {memory_type.capitalize()} Memory ---")
    for rank, node_data in enumerate(memory_data, start=1):
        node = node_data[0]  # 노드 데이터
        emotion_relevance = node_data[-1]  # emotion_relevance 값은 마지막 요소에 위치
        memory_emotion = node.get("emotion", "Unknown")

        total_emotion_relevance += emotion_relevance
        print(f"Rank {rank}: Memory Emotion: {memory_emotion}, Emotion Relevance: {emotion_relevance:.2f}")

    print(f"Total Emotion Relevance ({memory_type.capitalize()}): {total_emotion_relevance:.2f}")
    return total_emotion_relevance

def display_results(model_name, answer, short_memory, long_memory, total_emotion_score):
    print(f"=== {model_name} ===")
    print("Agent Answer:", answer)

    print("\nShort-Term Memory Rankings:")
    for idx, data in enumerate(short_memory):
        # 필요한 값만 추출 (예: 6개 데이터 구조로 가정)
        node, score, recency, relevance, poignancy, emotion_relevance = data[:6]
        print(f"[{idx + 1}] Description: {node.get('description', '')}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Relevance: {emotion_relevance:.2f}")

    print("\nLong-Term Memory Rankings:")
    for idx, data in enumerate(long_memory):
        # 필요한 값만 추출 (예: 6개 데이터 구조로 가정)
        node, score, recency, relevance, poignancy, emotion_relevance = data[:6]
        print(f"[{idx + 1}] Reflection: {node.get('reflection', '')}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Relevance: {emotion_relevance:.2f}")

    print(f"\n{model_name} Total Emotion Match Score: {total_emotion_score:.2f}")

def evaluate_models(persona, input_data, short_memory_path, long_memory_path, emotion_pairs):
    # GA 모델 가중치 설정
    weights_ga_short = {
        "recency": 3.0,
        "relevance": 2.0,
        "poignancy": 1.0,
        "emotion_score": 0.0,
        "emotion_relevance": 0.0
    }
    weights_ga_long = {
        "recency": 1.0,
        "relevance": 2.5,
        "poignancy": 1.5,
        "emotion_score": 0.0,
        "emotion_relevance": 0.0
    }

    # 우리 모델 가중치 설정
    weights_our_short = {
        "recency": 3.0,
        "relevance": 2.0,
        "poignancy": 1.0,
        "emotion_score": 1.5,
        "emotion_relevance": 1.0
    }
    weights_our_long = {
        "recency": 1.0,
        "relevance": 2.5,
        "poignancy": 1.5,
        "emotion_score": 3.0,
        "emotion_relevance": 3.5
    }

    # GA 모델 실행
    ga_answer, ga_short, ga_long = chat_agent(
        persona['Age'], persona['Gender'], persona['Existing Medical Conditions'],
        persona['Symptoms'], persona['Experience'], input_data,
        short_memory_path, long_memory_path, weights_ga_short, weights_ga_long, emotion_pairs
    )

    # 우리 모델 실행
    our_answer, our_short, our_long = chat_agent(
        persona['Age'], persona['Gender'], persona['Existing Medical Conditions'],
        persona['Symptoms'], persona['Experience'], input_data,
        short_memory_path, long_memory_path, weights_our_short, weights_our_long, emotion_pairs
    )

    # 감정 매칭 점수 계산
    print(f"User Input Emotion: {input_data['emotion']}")  # 사용자 입력의 감정 디버깅 출력
    ga_emotion_score_short = calculate_emotion_match_score(ga_short, memory_type="short")
    ga_emotion_score_long = calculate_emotion_match_score(ga_long, memory_type="long")
    ga_total_emotion_relevance = ga_emotion_score_short + ga_emotion_score_long

    our_emotion_score_short = calculate_emotion_match_score(our_short, memory_type="short")
    our_emotion_score_long = calculate_emotion_match_score(our_long, memory_type="long")
    our_total_emotion_relevance = our_emotion_score_short + our_emotion_score_long

    # 출력 결과
    display_results("GA Model", ga_answer, ga_short, ga_long, ga_total_emotion_relevance)
    display_results("Our Model", our_answer, our_short, our_long, our_total_emotion_relevance)

In [35]:
# 실행 코드
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1  # 사용할 프로파일 인덱스
persona = persona_extraction(persona_path, persona_num)

# 사용자 입력 데이터 생성
user_input = "When did you sleep yesterday?"  # 예제 질문
input_data = chat_input_gen(
    persona["Age"],
    persona["Gender"],
    persona["Existing Medical Conditions"],
    persona["Symptoms"],
    persona["Experience"],
    user_input
)

# 메모리 경로 설정
short_memory_path = "../output/5min_short_term_persona1.json"
long_memory_path = "../output/5min_long_term_persona1.json"

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# 모델 성능 평가
evaluate_models(persona, input_data, short_memory_path, long_memory_path, emotion_pairs)

User Input Emotion: fear

--- Debugging Short Memory ---
Rank 1: Memory Emotion: sadness, Emotion Relevance: 0.80
Rank 2: Memory Emotion: sadness, Emotion Relevance: 0.90
Rank 3: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 4: Memory Emotion: fear, Emotion Relevance: 0.70
Rank 5: Memory Emotion: anticipation, Emotion Relevance: 0.80
Total Emotion Relevance (Short): 4.00

--- Debugging Long Memory ---
Rank 1: Memory Emotion: sadness, Emotion Relevance: 0.33
Rank 2: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 3: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 4: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 5: Memory Emotion: fear, Emotion Relevance: 1.00
Total Emotion Relevance (Long): 4.33

--- Debugging Short Memory ---
Rank 1: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 2: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 3: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 4: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 5: Memory Emotion: fear, Em

### Try 2) Emotion 관련 Weight 크게 ###

In [32]:
def calculate_emotion_match_score(memory_data, memory_type="short"):
    """
    각 메모리의 emotion_relevance 점수를 합산하여 총합을 계산합니다.
    """
    total_emotion_relevance = 0

    print(f"\n--- Debugging {memory_type.capitalize()} Memory ---")
    for rank, node_data in enumerate(memory_data, start=1):
        node = node_data[0]  # 노드 데이터
        emotion_relevance = node_data[-1]  # emotion_relevance 값은 마지막 요소에 위치
        memory_emotion = node.get("emotion", "Unknown")

        total_emotion_relevance += emotion_relevance
        print(f"Rank {rank}: Memory Emotion: {memory_emotion}, Emotion Relevance: {emotion_relevance:.2f}")

    print(f"Total Emotion Relevance ({memory_type.capitalize()}): {total_emotion_relevance:.2f}")
    return total_emotion_relevance

def display_results(model_name, answer, short_memory, long_memory, total_emotion_score):
    print(f"=== {model_name} ===")
    print("Agent Answer:", answer)

    print("\nShort-Term Memory Rankings:")
    for idx, data in enumerate(short_memory):
        # 필요한 값만 추출 (예: 6개 데이터 구조로 가정)
        node, score, recency, relevance, poignancy, emotion_relevance = data[:6]
        print(f"[{idx + 1}] Description: {node.get('description', '')}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Relevance: {emotion_relevance:.2f}")

    print("\nLong-Term Memory Rankings:")
    for idx, data in enumerate(long_memory):
        # 필요한 값만 추출 (예: 6개 데이터 구조로 가정)
        node, score, recency, relevance, poignancy, emotion_relevance = data[:6]
        print(f"[{idx + 1}] Reflection: {node.get('reflection', '')}, Score: {score:.2f}, Recency: {recency:.2f}, Relevance: {relevance:.2f}, Poignancy: {poignancy:.2f}, Emotion Relevance: {emotion_relevance:.2f}")

    print(f"\n{model_name} Total Emotion Match Score: {total_emotion_score:.2f}")

def evaluate_models(persona, input_data, short_memory_path, long_memory_path, emotion_pairs):
    # GA 모델 가중치 설정
    weights_ga_short = {
        "recency": 3.0,
        "relevance": 2.0,
        "poignancy": 1.0,
        "emotion_score": 0.0,
        "emotion_relevance": 0.0
    }
    weights_ga_long = {
        "recency": 1.0,
        "relevance": 2.5,
        "poignancy": 1.5,
        "emotion_score": 0.0,
        "emotion_relevance": 0.0
    }

    # 우리 모델 가중치 설정
    weights_our_short = {
        "recency": 3.0,
        "relevance": 2.0,
        "poignancy": 1.0,
        "emotion_score": 5.5, # 기본 + 4
        "emotion_relevance": 5.0 # 기본 + 4
    }
    weights_our_long = {
        "recency": 1.0,
        "relevance": 2.5,
        "poignancy": 1.5,
        "emotion_score": 7.0, # 기본 + 4
        "emotion_relevance": 7.5 # 기본 + 4
    }

    # GA 모델 실행
    ga_answer, ga_short, ga_long = chat_agent(
        persona['Age'], persona['Gender'], persona['Existing Medical Conditions'],
        persona['Symptoms'], persona['Experience'], input_data,
        short_memory_path, long_memory_path, weights_ga_short, weights_ga_long, emotion_pairs
    )

    # 우리 모델 실행
    our_answer, our_short, our_long = chat_agent(
        persona['Age'], persona['Gender'], persona['Existing Medical Conditions'],
        persona['Symptoms'], persona['Experience'], input_data,
        short_memory_path, long_memory_path, weights_our_short, weights_our_long, emotion_pairs
    )

    # 감정 매칭 점수 계산
    print(f"User Input Emotion: {input_data['emotion']}")  # 사용자 입력의 감정 디버깅 출력
    ga_emotion_score_short = calculate_emotion_match_score(ga_short, memory_type="short")
    ga_emotion_score_long = calculate_emotion_match_score(ga_long, memory_type="long")
    ga_total_emotion_relevance = ga_emotion_score_short + ga_emotion_score_long

    our_emotion_score_short = calculate_emotion_match_score(our_short, memory_type="short")
    our_emotion_score_long = calculate_emotion_match_score(our_long, memory_type="long")
    our_total_emotion_relevance = our_emotion_score_short + our_emotion_score_long

    # 출력 결과
    display_results("GA Model", ga_answer, ga_short, ga_long, ga_total_emotion_relevance)
    display_results("Our Model", our_answer, our_short, our_long, our_total_emotion_relevance)

In [33]:
# 실행 코드
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1  # 사용할 프로파일 인덱스
persona = persona_extraction(persona_path, persona_num)

# 사용자 입력 데이터 생성
user_input = "When did you sleep yesterday?"  # 예제 질문
input_data = chat_input_gen(
    persona["Age"],
    persona["Gender"],
    persona["Existing Medical Conditions"],
    persona["Symptoms"],
    persona["Experience"],
    user_input
)

# 메모리 경로 설정
short_memory_path = "../output/5min_short_term_persona1.json"
long_memory_path = "../output/5min_long_term_persona1.json"

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# 모델 성능 평가
evaluate_models(persona, input_data, short_memory_path, long_memory_path, emotion_pairs)

User Input Emotion: fear

--- Debugging Short Memory ---
Rank 1: Memory Emotion: sadness, Emotion Relevance: 0.80
Rank 2: Memory Emotion: sadness, Emotion Relevance: 0.90
Rank 3: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 4: Memory Emotion: fear, Emotion Relevance: 0.70
Rank 5: Memory Emotion: anticipation, Emotion Relevance: 0.80
Total Emotion Relevance (Short): 4.00

--- Debugging Long Memory ---
Rank 1: Memory Emotion: sadness, Emotion Relevance: 0.33
Rank 2: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 3: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 4: Memory Emotion: fear, Emotion Relevance: 1.00
Rank 5: Memory Emotion: fear, Emotion Relevance: 1.00
Total Emotion Relevance (Long): 4.33

--- Debugging Short Memory ---
Rank 1: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 2: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 3: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 4: Memory Emotion: fear, Emotion Relevance: 0.80
Rank 5: Memory Emotion: fear, Em

## Evaluation 3 - 최종 답변 평가 ##

### Try 1) 기본 Weight ###

In [19]:
# 평가 기준 정의
criteria = [
    "Clearly provide the requested information",
    "Relevance to the topic",
    "Detail sufficiency",
    "Reflection of insomnia patient perspective"
]

# 질문 정의
questions = [
    "What was the most memorable thing that happened today?",
    "What did you do today?",
    "What places did you visit today?",
    "Tell me about what happened before lunch",
    "Tell me about what happened after lunch",
    "How did you feel when you were unable to sleep"
]

# 가중치 정의
weights_short = {
    "recency": 3.0,
    "relevance": 2.0,
    "poignancy": 1.0,
    "emotion_score": 1.5,
    "emotion_relevance": 1.0
}
weights_long = {
    "recency": 1.0,
    "relevance": 2.5,
    "poignancy": 1.5,
    "emotion_score": 2.0,
    "emotion_relevance": 2.5
}

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# Hybrid Retrieval과 Raw GPT 평가 함수
def evaluate_responses(answers, questions, prompt):
    scores = []
    for question, answer in zip(questions, answers):
        score = chat_with_gpt(answer, prompt)
        scores.append(list(map(int, score.split(','))))
    return scores

def display_scores_with_criteria(questions, scores_hybrid, scores_raw, title):
    print(f"--- {title} ---")
    header = f"{'Question':<80} {'Criterion':<50} {'Hybrid Score':<15} {'Raw GPT Score':<15}"
    print(header)
    print("-" * len(header))

    for idx, question in enumerate(questions):
        for crit_idx, criterion in enumerate(criteria):
            hybrid_score = scores_hybrid[idx][crit_idx]
            raw_score = scores_raw[idx][crit_idx]
            
            if crit_idx == 0:  # 첫 번째 기준은 질문도 출력
                print(f"{question:<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
            else:  # 나머지는 질문을 비워서 출력
                print(f"{'':<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
        print("-" * len(header))  # 각 질문 구분선 추가


In [20]:
# Hybrid Retrieval 실행 및 평가
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1
short_memory_path = "../output/5min_short_term_persona1.json" 
long_memory_path = "../output/5min_long_term_persona1.json"

# Persona 데이터 로드
persona = persona_extraction(persona_path, persona_num)
age, gender, condition, symptoms, experience = persona.values()

# Hybrid Retrieval 응답 생성
our_answers = []
for question in questions:
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    answer, short_memory, long_memory = chat_agent(
        age, gender, condition, symptoms, experience, input_data,
        short_memory_path, long_memory_path, weights_short, weights_long, emotion_pairs
    )
    our_answers.append(answer)

# Hybrid Retrieval 평가
g_eval_prompt = """
You are an experienced evaluator.

Evaluate the responses of the generated LLM agent based on the following criteria:
A score of 1 indicates the response is the least appropriate according to the criterion, while a score of 5 indicates the response is the most appropriate.

- Did the response clearly provide the information requested in the question? (1-5 points)
- Is the response highly relevant to the topic of the question? (1-5 points)
- Did the response provide sufficiently detailed information about the question? (1-5 points)
- How well did the response reflect the perspective of an insomnia patient? (1-5 points)

Generated format should be same as this: "score1,score2,score3,score4"
Return only score
"""
hybrid_scores = evaluate_responses(our_answers, questions, g_eval_prompt)

# Raw GPT 응답 생성 및 평가
file_path = "../data/dummy_dataset/final_dummy_data_1.json"
with open(file_path, 'r', encoding='utf-8') as file:
    txt_data = file.read()

raw_answers = []
for question in questions:
    system_prompt = final_prompt(age, gender, condition, symptoms, experience, txt_data)
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    user_prompt = input_data["description"]
    answer = chat_with_gpt(user_prompt, system_prompt, model='gpt-4o')
    raw_answers.append(answer)

raw_scores = evaluate_responses(raw_answers, questions, g_eval_prompt)

# 결과 출력
# Hybrid Retrieval 결과 출력
display_scores_with_criteria(questions, hybrid_scores, raw_scores, "Comparison of Hybrid Retrieval and Raw GPT")

--- Comparison of Hybrid Retrieval and Raw GPT ---
Question                                                                         Criterion                                          Hybrid Score    Raw GPT Score  
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
What was the most memorable thing that happened today?                           Clearly provide the requested information          3               1              
                                                                                 Relevance to the topic                             4               3              
                                                                                 Detail sufficiency                                 3               2              
                                                                                 Reflection of insomnia patient perspective      

### Try 2) Emotion 관련 Weight 0 ###

In [21]:
# 평가 기준 정의
criteria = [
    "Clearly provide the requested information",
    "Relevance to the topic",
    "Detail sufficiency",
    "Reflection of insomnia patient perspective"
]

# 질문 정의
questions = [
    "What was the most memorable thing that happened today?",
    "What did you do today?",
    "What places did you visit today?",
    "Tell me about what happened before lunch",
    "Tell me about what happened after lunch",
    "How did you feel when you were unable to sleep"
]

# 가중치 정의
weights_short = {
    "recency": 3.0,
    "relevance": 2.0,
    "poignancy": 1.0,
    "emotion_score": 0.0,
    "emotion_relevance": 0.0
}
weights_long = {
    "recency": 1.0,
    "relevance": 2.5,
    "poignancy": 1.5,
    "emotion_score": 0.0,
    "emotion_relevance": 0.0
}

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# Hybrid Retrieval과 Raw GPT 평가 함수
def evaluate_responses(answers, questions, prompt):
    scores = []
    for question, answer in zip(questions, answers):
        score = chat_with_gpt(answer, prompt)
        scores.append(list(map(int, score.split(','))))
    return scores

def display_scores_with_criteria(questions, scores_hybrid, scores_raw, title):
    print(f"--- {title} ---")
    header = f"{'Question':<80} {'Criterion':<50} {'Hybrid Score':<15} {'Raw GPT Score':<15}"
    print(header)
    print("-" * len(header))

    for idx, question in enumerate(questions):
        for crit_idx, criterion in enumerate(criteria):
            hybrid_score = scores_hybrid[idx][crit_idx]
            raw_score = scores_raw[idx][crit_idx]
            
            if crit_idx == 0:  # 첫 번째 기준은 질문도 출력
                print(f"{question:<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
            else:  # 나머지는 질문을 비워서 출력
                print(f"{'':<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
        print("-" * len(header))  # 각 질문 구분선 추가


In [22]:
# Hybrid Retrieval 실행 및 평가
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1
short_memory_path = "../output/5min_short_term_persona1.json" 
long_memory_path = "../output/5min_long_term_persona1.json"

# Persona 데이터 로드
persona = persona_extraction(persona_path, persona_num)
age, gender, condition, symptoms, experience = persona.values()

# Hybrid Retrieval 응답 생성
our_answers = []
for question in questions:
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    answer, short_memory, long_memory = chat_agent(
        age, gender, condition, symptoms, experience, input_data,
        short_memory_path, long_memory_path, weights_short, weights_long, emotion_pairs
    )
    our_answers.append(answer)

# Hybrid Retrieval 평가
g_eval_prompt = """
You are an experienced evaluator.

Evaluate the responses of the generated LLM agent based on the following criteria:
A score of 1 indicates the response is the least appropriate according to the criterion, while a score of 5 indicates the response is the most appropriate.

- Did the response clearly provide the information requested in the question? (1-5 points)
- Is the response highly relevant to the topic of the question? (1-5 points)
- Did the response provide sufficiently detailed information about the question? (1-5 points)
- How well did the response reflect the perspective of an insomnia patient? (1-5 points)

Generated format should be same as this: "score1,score2,score3,score4"
Return only score
"""
hybrid_scores = evaluate_responses(our_answers, questions, g_eval_prompt)

# Raw GPT 응답 생성 및 평가
file_path = "../data/dummy_dataset/final_dummy_data_1.json"
with open(file_path, 'r', encoding='utf-8') as file:
    txt_data = file.read()

raw_answers = []
for question in questions:
    system_prompt = final_prompt(age, gender, condition, symptoms, experience, txt_data)
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    user_prompt = input_data["description"]
    answer = chat_with_gpt(user_prompt, system_prompt, model='gpt-4o')
    raw_answers.append(answer)

raw_scores = evaluate_responses(raw_answers, questions, g_eval_prompt)

# 결과 출력
# Hybrid Retrieval 결과 출력
display_scores_with_criteria(questions, hybrid_scores, raw_scores, "Comparison of Hybrid Retrieval and Raw GPT(weight0)")

--- Comparison of Hybrid Retrieval and Raw GPT(weight0) ---
Question                                                                         Criterion                                          Hybrid Score    Raw GPT Score  
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
What was the most memorable thing that happened today?                           Clearly provide the requested information          1               1              
                                                                                 Relevance to the topic                             2               1              
                                                                                 Detail sufficiency                                 1               1              
                                                                                 Reflection of insomnia patient perspect

### Try 3) GA Weight 맞춰서 ###

In [36]:
# 평가 기준 정의
criteria = [
    "Clearly provide the requested information",
    "Relevance to the topic",
    "Detail sufficiency",
    "Reflection of insomnia patient perspective"
]

# 질문 정의
questions = [
    "What was the most memorable thing that happened today?",
    "What did you do today?",
    "What places did you visit today?",
    "Tell me about what happened before lunch",
    "Tell me about what happened after lunch",
    "How did you feel when you were unable to sleep"
]

# 가중치 정의
weights_short = {
    "recency": 0.5,
    "relevance": 3.0,
    "poignancy": 2.0,
    "emotion_score": 0.0,
    "emotion_relevance": 0.0
}
weights_long = {
    "recency": 0.5,
    "relevance": 3.0,
    "poignancy": 2.0,
    "emotion_score": 0.0,
    "emotion_relevance": 0.0
}

# 감정 관계 설정
emotion_pairs = {
    'joy': 'sadness', 'sadness': 'joy',
    'anger': 'fear', 'fear': 'anger',
    'anticipation': 'surprise', 'surprise': 'anticipation',
    'trust': 'disgust', 'disgust': 'trust'
}

# Hybrid Retrieval과 Raw GPT 평가 함수
def evaluate_responses(answers, questions, prompt):
    scores = []
    for question, answer in zip(questions, answers):
        score = chat_with_gpt(answer, prompt)
        scores.append(list(map(int, score.split(','))))
    return scores

def display_scores_with_criteria(questions, scores_hybrid, scores_raw, title):
    print(f"--- {title} ---")
    header = f"{'Question':<80} {'Criterion':<50} {'Hybrid Score':<15} {'Raw GPT Score':<15}"
    print(header)
    print("-" * len(header))

    for idx, question in enumerate(questions):
        for crit_idx, criterion in enumerate(criteria):
            hybrid_score = scores_hybrid[idx][crit_idx]
            raw_score = scores_raw[idx][crit_idx]
            
            if crit_idx == 0:  # 첫 번째 기준은 질문도 출력
                print(f"{question:<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
            else:  # 나머지는 질문을 비워서 출력
                print(f"{'':<80} {criterion:<50} {hybrid_score:<15} {raw_score:<15}")
        print("-" * len(header))  # 각 질문 구분선 추가

In [37]:
# Hybrid Retrieval 실행 및 평가
persona_path = "../data/dummy_dataset/processed_persona_data.json"
persona_num = 1
short_memory_path = "../output/5min_short_term_persona1.json" 
long_memory_path = "../output/5min_long_term_persona1.json"

# Persona 데이터 로드
persona = persona_extraction(persona_path, persona_num)
age, gender, condition, symptoms, experience = persona.values()

# Hybrid Retrieval 응답 생성
our_answers = []
for question in questions:
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    answer, short_memory, long_memory = chat_agent(
        age, gender, condition, symptoms, experience, input_data,
        short_memory_path, long_memory_path, weights_short, weights_long, emotion_pairs
    )
    our_answers.append(answer)

# Hybrid Retrieval 평가
g_eval_prompt = """
You are an experienced evaluator.

Evaluate the responses of the generated LLM agent based on the following criteria:
A score of 1 indicates the response is the least appropriate according to the criterion, while a score of 5 indicates the response is the most appropriate.

- Did the response clearly provide the information requested in the question? (1-5 points)
- Is the response highly relevant to the topic of the question? (1-5 points)
- Did the response provide sufficiently detailed information about the question? (1-5 points)
- How well did the response reflect the perspective of an insomnia patient? (1-5 points)

Generated format should be same as this: "score1,score2,score3,score4"
Return only score
"""
hybrid_scores = evaluate_responses(our_answers, questions, g_eval_prompt)

# Raw GPT 응답 생성 및 평가
file_path = "../data/dummy_dataset/final_dummy_data_1.json"
with open(file_path, 'r', encoding='utf-8') as file:
    txt_data = file.read()

raw_answers = []
for question in questions:
    system_prompt = final_prompt(age, gender, condition, symptoms, experience, txt_data)
    input_data = chat_input_gen(age, gender, condition, symptoms, experience, question)
    user_prompt = input_data["description"]
    answer = chat_with_gpt(user_prompt, system_prompt, model='gpt-4o')
    raw_answers.append(answer)

raw_scores = evaluate_responses(raw_answers, questions, g_eval_prompt)

# 결과 출력
# Hybrid Retrieval 결과 출력
display_scores_with_criteria(questions, hybrid_scores, raw_scores, "Comparison of Hybrid Retrieval and Raw GPT(weightGA)")

--- Comparison of Hybrid Retrieval and Raw GPT(weightGA) ---
Question                                                                         Criterion                                          Hybrid Score    Raw GPT Score  
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
What was the most memorable thing that happened today?                           Clearly provide the requested information          1               1              
                                                                                 Relevance to the topic                             1               1              
                                                                                 Detail sufficiency                                 1               1              
                                                                                 Reflection of insomnia patient perspec