In [6]:
import os
import json
import pandas as pd

# Load MELD test set
test_df = pd.read_csv("data/test_sent_emo.csv")

# Load generated responses CHANGE THIS PATH
gen_df = pd.read_csv("results/mistral_baseline.csv")

# Sort for safety
test_df.sort_values(by=["Dialogue_ID", "Utterance_ID"], inplace=True)
gen_df.sort_values(by=["Dialogue_ID", "Utterance_ID"], inplace=True)

# Output list
output = []

# Group MELD test by Dialogue_ID
grouped = test_df.groupby("Dialogue_ID")

# Iterate over each dialogue
for dialogue_id, group in grouped:
    dialogue = group.sort_values("Utterance_ID")

    history_utterances = []
    history_emotions = []
    history_sentiments = []

    for _, row in dialogue.iterrows():
        utterance_id = int(row["Utterance_ID"])
        speaker = row["Speaker"]
        utterance = row["Utterance"]
        emotion = row["Emotion"]
        sentiment = row["Sentiment"]

        # Format speaker utterance
        formatted_utterance = f"{speaker}: {utterance}"
        history_utterances.append(formatted_utterance)
        history_emotions.append(emotion)
        history_sentiments.append(sentiment)

        # Match the generated response
        match = gen_df[
            (gen_df["Dialogue_ID"] == dialogue_id) &
            (gen_df["Utterance_ID"] == utterance_id)
        ]
        if not match.empty:
            generated_response = match.iloc[0]["Response"]
        else:
            generated_response = ""

        output.append({
            "dialogue_id": dialogue_id,
            "utterance_id": utterance_id,
            "generated_response": generated_response,
            "history_utterances": history_utterances.copy(),
            "history_emotions": history_emotions.copy(),
            "history_sentiments": history_sentiments.copy()
        })

# Ensure output directory exists
os.makedirs("empathy", exist_ok=True)

# Save to JSONL
with open("empathy/mistral_baseline/dialogue_generations.jsonl", "w", encoding="utf-8") as f:
    for item in output:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")
