# emotion related data analysis
* Modeling Naive Psychology of Characters in Simple Commonsense Stories
    * https://uwnlp.github.io/storycommonsense/

In [1]:
from enum import Enum
import json
import os

from openai import AsyncOpenAI
import pandas as pd
from pydantic import BaseModel, Extra, Field

from config import settings

In [2]:
print(settings.llm_model)
client = AsyncOpenAI(
    base_url=settings.llm_base_url,
    api_key=settings.llm_api_key
)

gpt-4.1-nano


# 1. COMET-2020
COMET (commonsense knowledge datasets) contains triples related to emotional reactions
* https://arxiv.org/abs/2010.05953
* `xReact`, `oReact`: Relations xReact and oReact define the emotional reactions on the part of X or other participants in an event.

In [3]:
comet_data_dir = "/Users/id4thomas/datasets/commonsense/atomic2020_data-feb2021"
os.listdir(comet_data_dir)

['.DS_Store',
 'LICENSE',
 'train.tsv',
 'dev.tsv',
 'README.md',
 'test.tsv',
 'hwang2021comet.pdf']

In [4]:
split = "train" # train, dev, test
comet_df = pd.read_csv(os.path.join(comet_data_dir, f"{split}.tsv"), sep="\t", header=None, names=["source", "relation", "target"])
print(comet_df.shape, comet_df.columns)

(1076880, 3) Index(['source', 'relation', 'target'], dtype='object')


In [5]:
print(
    "{} xReact {} oReact {}".format(
        split,
        comet_df[comet_df.relation=='xReact'].shape[0],
        comet_df[comet_df.relation=='oReact'].shape[0]
    )
)

train xReact 65984 oReact 54632


In [6]:
print(comet_df.relation.unique())

['oEffect' 'oReact' 'oWant' 'xAttr' 'xEffect' 'xIntent' 'xNeed' 'xReact'
 'xWant' 'AtLocation' 'ObjectUse' 'Desires' 'HasProperty' 'NotDesires'
 'Causes' 'HasSubEvent' 'xReason' 'CapableOf' 'MadeUpOf' 'isAfter'
 'isBefore' 'isFilledBy' 'HinderedBy']


In [7]:
comet_df[comet_df.relation=='xReact'].head()

Unnamed: 0,source,relation,target
23,PersonX abandons ___ altogether,xReact,authoritative
55,PersonX abandons the ___ altogether,xReact,pressurized
88,PersonX abolishes ___ altogether,xReact,he was sad
115,PersonX abolishes ___ in the states,xReact,sad
145,PersonX abolishes the ___ altogether,xReact,happy


In [8]:
react_df = comet_df[comet_df.relation.isin(['xReact', 'oReact'])]
print("React: {}".format(react_df.shape[0]))

React: 120616


## 1-1. Data collection test

## Single Inference

In [9]:
# Single Sample
system_prompt = '''You are provided with entries from a commonsense reasoning dataset (COMET-2020) that consist of three parts:
1.	source: A short event description containing the placeholder “PersonX” or “PersonY.”
2.	relation: Either xReact (indicating the subject’s emotional reaction) or oReact (indicating the other person’s emotional reaction).
3.	target: A brief description of the emotion, state, or reaction.

# Task
For each entry in the dataset, you need to:
## Expand the Source
Replace “PersonX” or “PersonY” with a random human name (e.g., Alex, Jordan, María, Fatima, Ethan, etc.) and expand the event into a realistic scenario by adding enough context.
For example, if the source is “PersonX abandons ___ altogether,” you might expand it to:
“Veronica decides to stop relying on social media altogether, feeling that she has complete control over her personal life.”

## Map the Emotion: Map the target emotion onto the 8 primary emotions from Plutchik’s Wheel
* Joy
* Trust
* Fear
* Surprise
* Sadness
* Disgust
* Anger
* Anticipation

For each emotion, assign one of the following intensities:
* "na" (not applicable)
* "low"
* "medium"
* "high"

Interpret the target emotion (e.g., “authoritative”) in terms of Plutchik’s emotions. For example, you might decide:
* trust: high
* joy: low
* anticipation: medium

## Write a Reason:
Provide a one-sentence rationale ("reason") explaining why the subject (if xReact) or the other person (if oReact) feels the given emotion(s).
ex. “She feels empowered and confident after cutting out social media.”

# Note
* For xReact, the emotion data should pertain to the subject’s (formerly “PersonX”) reaction.
* For oReact, the emotion data should pertain to the other person (formerly “PersonY”)

Return in the following JSON format
{
  "source": "Expanded realistic scenario with a real name in place of PersonX/PersonY",
  "emotion": {
    "joy": "na" | "low" | "medium" | "high",
    "trust": "na" | "low" | "medium" | "high",
    "fear": "na" | "low" | "medium" | "high",
    "surprise": "na" | "low" | "medium" | "high",
    "sadness": "na" | "low" | "medium" | "high",
    "disgust": "na" | "low" | "medium" | "high",
    "anger": "na" | "low" | "medium" | "high",
    "anticipation": "na" | "low" | "medium" | "high"
  },
  "reason": "One sentence explaining why these emotions occur"
}
Only return the JSON, don't return in markdown format. (ex. "```json...") start like "{..."
'''
user_template = '''[Source]
{source}
[Relation]
{relation}
[Target]
{target}'''

In [10]:
class RelationshipStatus(str, Enum):
    na = "na"
    low = "low"
    medium = "medium"
    high = "high"
    
class EmotionLabel(BaseModel):
    joy: RelationshipStatus
    trust: RelationshipStatus
    fear: RelationshipStatus
    surprise: RelationshipStatus
    sadness: RelationshipStatus
    disgust: RelationshipStatus
    anger: RelationshipStatus
    anticipation: RelationshipStatus
    
    class Config:
        extra = Extra.forbid
        use_enum_values = True
        
class Result(BaseModel):
    source: str
    emotion: EmotionLabel
    reason: str
    
    class Config:
        extra = Extra.forbid
        use_enum_values = True

/var/folders/wj/0c7skj2154q4844jqxlw3yxr0000gn/T/ipykernel_25240/824004795.py:18: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  extra = Extra.forbid
/var/folders/wj/0c7skj2154q4844jqxlw3yxr0000gn/T/ipykernel_25240/824004795.py:27: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  extra = Extra.forbid


In [11]:
row = comet_df[comet_df.relation=='xReact'].iloc[0]

user_message = user_template.format(
    source=row['source'],
    relation=row['relation'],
    target=row['target'],
)

# OpenAI
result = await client.beta.chat.completions.parse(
    model=settings.llm_model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message},
    ],
    response_format=Result,
)

# vLLM
# result = await client.chat.completions.create(
#     model=settings.llm_model,
#     messages=[
#         {"role": "system", "content": system_prompt},
#         {"role": "user", "content": user_message},
#     ],
#     extra_body={"guided_json": Result.model_json_schema()}
# )

In [12]:
# print(result.choices[0].message.content)
# print(json.loads(result.choices[0].message.content))

In [13]:
prediction_result = result.choices[0].message.parsed
print(prediction_result.model_dump_json(indent=4))

{
    "source": "Veronica decides to stop relying on social media altogether, feeling that she has complete control over her personal life.",
    "emotion": {
        "joy": "na",
        "trust": "high",
        "fear": "low",
        "surprise": "low",
        "sadness": "na",
        "disgust": "low",
        "anger": "na",
        "anticipation": "medium"
    },
    "reason": "She feels empowered and confident after cutting out social media."
}


## Multiple Inference

In [14]:
# Multiple Samples
system_prompt = '''You are provided with entries from a commonsense reasoning dataset (COMET-2020) that consist of three parts:
1.	source: A short event description containing the placeholder “PersonX” or “PersonY.”
2.	relation: Either xReact (indicating the subject’s emotional reaction) or oReact (indicating the other person’s emotional reaction).
3.	target: A brief description of the emotion, state, or reaction.

# Task
For each entry in the dataset, you need to:
## Expand the Source
Replace “PersonX” or “PersonY” with a random human name (e.g., Alex, Jordan, María, Fatima, Ethan, etc.) and expand the event into a realistic scenario by adding enough context.
Write at least 3 sentences with detailed descriptions of the situation. (about 300 characters)
For example, if the source is “PersonX abandons ___ altogether,” you might expand it to:
“Veronica decides to stop relying on social media altogether, feeling that she has complete control over her personal life. ...”

## Map the Emotion: Map the target emotion onto the 8 primary emotions from Plutchik’s Wheel
* Joy
* Trust
* Fear
* Surprise
* Sadness
* Disgust
* Anger
* Anticipation

For each emotion, assign one of the following intensities:
* "na" (not applicable)
* "low"
* "medium"
* "high"

Interpret the target emotion (e.g., “authoritative”) in terms of Plutchik’s emotions. For example, you might decide:
* trust: high
* joy: low
* anticipation: medium

## Write a Reason:
Provide a one-sentence rationale ("reason") explaining why the subject (if xReact) or the other person (if oReact) feels the given emotion(s).
ex. “She feels empowered and confident after cutting out social media.”

# Note
* For xReact, the emotion data should pertain to the subject’s (formerly “PersonX”) reaction.
* For oReact, the emotion data should pertain to the other person (formerly “PersonY”)

Return in the following JSON format
* write prediction for each entry by its uid as key
{
    {uid_value}: {
      "source": "Expanded realistic scenario with a real name in place of PersonX/PersonY",
      "person": "name of the person that will feel this emotion",
      "emotion": {
        "joy": "na" | "low" | "medium" | "high",
        "trust": "na" | "low" | "medium" | "high",
        "fear": "na" | "low" | "medium" | "high",
        "surprise": "na" | "low" | "medium" | "high",
        "sadness": "na" | "low" | "medium" | "high",
        "disgust": "na" | "low" | "medium" | "high",
        "anger": "na" | "low" | "medium" | "high",
        "anticipation": "na" | "low" | "medium" | "high"
      },
      "reason": "One sentence explaining why these emotions occur"
    },
    ...
}

Only return the JSON, don't return in markdown format. (ex. "```json...") start like "{..."
'''
user_template = '''[Entries]
{entries}'''

'''
{
  "result": [
    {
      "uid": str,
      "source": "Expanded realistic scenario with a real name in place of PersonX/PersonY",
      "person": "name of the person that will feel this emotion",
      "emotion": {
        "joy": "na" | "low" | "medium" | "high",
        "trust": "na" | "low" | "medium" | "high",
        "fear": "na" | "low" | "medium" | "high",
        "surprise": "na" | "low" | "medium" | "high",
        "sadness": "na" | "low" | "medium" | "high",
        "disgust": "na" | "low" | "medium" | "high",
        "anger": "na" | "low" | "medium" | "high",
        "anticipation": "na" | "low" | "medium" | "high"
      },
      "reason": "One sentence explaining why these emotions occur"
    },
    ...
  ]
}
'''

'\n{\n  "result": [\n    {\n      "uid": str,\n      "source": "Expanded realistic scenario with a real name in place of PersonX/PersonY",\n      "person": "name of the person that will feel this emotion",\n      "emotion": {\n        "joy": "na" | "low" | "medium" | "high",\n        "trust": "na" | "low" | "medium" | "high",\n        "fear": "na" | "low" | "medium" | "high",\n        "surprise": "na" | "low" | "medium" | "high",\n        "sadness": "na" | "low" | "medium" | "high",\n        "disgust": "na" | "low" | "medium" | "high",\n        "anger": "na" | "low" | "medium" | "high",\n        "anticipation": "na" | "low" | "medium" | "high"\n      },\n      "reason": "One sentence explaining why these emotions occur"\n    },\n    ...\n  ]\n}\n'

In [15]:
import uuid
def prepare_entries(rows):
    uids = []
    entries = []
    for i in range(rows.shape[0]):
        row = rows.iloc[i]
        uid = str(uuid.uuid4())
        entry = {
            "uid": uid,
            "source": row['source'],
            "relation": row["relation"],
            "target": row["target"]
        }
        
        uids.append(uid)
        entries.append(entry)
    return uids, entries

batch_size = 32
idxs = list(range(batch_size))
uids, entries = prepare_entries(comet_df.iloc[idxs])

In [16]:
from typing import Any, List

def create_dynamic_enum(name: str, values: List[Any]) -> Enum:
    return Enum(name, {str(v): v for v in values})

In [17]:
user_message = user_template.format(
    entries = json.dumps(entries)
)

class EntryResult(BaseModel):
    # uid: create_dynamic_enum("uid", uids)
    source: str
    character: str
    emotion: EmotionLabel
    reason: str
    
    class Config:
        extra = Extra.forbid
        use_enum_values = True
        
class Results(BaseModel):
    result: List[EntryResult]
    
    class Config:
        extra = Extra.forbid
        use_enum_values = True

/var/folders/wj/0c7skj2154q4844jqxlw3yxr0000gn/T/ipykernel_25240/560613264.py:13: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  extra = Extra.forbid
/var/folders/wj/0c7skj2154q4844jqxlw3yxr0000gn/T/ipykernel_25240/560613264.py:20: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  extra = Extra.forbid


In [18]:
from pydantic import create_model

# spec_dict = dict()
# for k, v in specification.items():
#     dtype = v.get("dtype", str)
#     dim = v.get("dim", 0)
#     allowed_values = v.get("allowed_values", None)
#     default_val = ...

#     if isinstance(dtype, str):
#         dtype = get_dtype(dtype)

#     if allowed_values:
#         allowed_values = [dtype(v) for v in allowed_values]
#         enum = create_dynamic_enum(f"{k}-enum", allowed_values)
#         dtype = enum

#     ## Dim
#     if dim == 1:
#         dtype = List[dtype]
        
spec_dict = dict()
for uid in uids:
    spec_dict[uid] = (EntryResult, ...)

Results = create_model("Results", __config__={"extra": "forbid"}, **spec_dict)

In [19]:
# OpenAI
completion_result = await client.beta.chat.completions.parse(
    model=settings.llm_model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message},
    ],
    response_format=Results,
)

# vLLM
# completion_result = await client.chat.completions.create(
#     model=settings.llm_model,
#     messages=[
#         {"role": "system", "content": system_prompt},
#         {"role": "user", "content": user_message},
#     ],
#     extra_body={"guided_json": Results.model_json_schema()}
# )

In [20]:
print(completion_result.usage.model_dump_json(indent=4))
prediction_result = completion_result.choices[0].message.parsed
print(prediction_result.model_dump_json(indent=4))


{
    "completion_tokens": 5069,
    "prompt_tokens": 3835,
    "total_tokens": 8904,
    "completion_tokens_details": {
        "accepted_prediction_tokens": 0,
        "audio_tokens": 0,
        "reasoning_tokens": 0,
        "rejected_prediction_tokens": 0
    },
    "prompt_tokens_details": {
        "audio_tokens": 0,
        "cached_tokens": 0
    }
}
{
    "4e7b1042-7531-47da-a70b-bd2a900d5539": {
        "source": "Liam decided to abandon his unhealthy habits altogether, feeling it was necessary for his well-being. He started exercising, eating healthier, and quitting smoking to improve his lifestyle. He was determined to make positive changes.",
        "character": "Liam",
        "emotion": {
            "joy": "na",
            "trust": "na",
            "fear": "na",
            "surprise": "na",
            "sadness": "na",
            "disgust": "na",
            "anger": "na",
            "anticipation": "na"
        },
        "reason": "He feels motivated and hopeful 

In [21]:
# prediction_result = result.choices[0].message.parsed
# print(prediction_result.model_dump_json(indent=4))

# batch_results = {}
# for result in prediction_result.result:
#     batch_results[result.uid]=result
    
# for uid, entry in zip(uids, entries):
#     print(f"UID: {uid}")
#     print("Entry: {}".format(json.dumps(entry)))
    
#     result = batch_results[uid]
#     print("Result: {}".format(result.model_dump_json()))
#     print(len(result.source))
#     print('-'*30)

In [22]:
prediction_result = json.loads(completion_result.choices[0].message.content)
print(len(prediction_result.keys()))
print(json.dumps(prediction_result, indent=4))

32
{
    "4e7b1042-7531-47da-a70b-bd2a900d5539": {
        "source": "Liam decided to abandon his unhealthy habits altogether, feeling it was necessary for his well-being. He started exercising, eating healthier, and quitting smoking to improve his lifestyle. He was determined to make positive changes.",
        "character": "Liam",
        "emotion": {
            "joy": "na",
            "trust": "na",
            "fear": "na",
            "surprise": "na",
            "sadness": "na",
            "disgust": "na",
            "anger": "na",
            "anticipation": "na"
        },
        "reason": "He feels motivated and hopeful about improving his health and lifestyle."
    },
    "32ffa926-3c8a-4b28-bc8d-749fb49ac71e": {
        "source": "Maria decided to abandon her old career path entirely, feeling unfulfilled in her previous job. She researched new opportunities and enrolled in courses to start fresh in a different field. Her decision marked a significant life change.",
   

In [24]:
# batch_results = {}
# print(len(prediction_result["result"]))
# for result in prediction_result["result"]:
#     batch_results[result["uid"]]=result
    
# for uid, entry in zip(uids, entries):
#     print(f"UID: {uid}")
#     print("Entry: {}".format(json.dumps(entry)))
    
#     result = batch_results[uid]
#     print("Result: {}".format(result.model_dump_json()))
#     print(len(result.source))
#     print('-'*30)