In [22]:
import json
with open("data.json", "r") as f:
    data = json.load(f)

In [23]:
from enum import Enum

class MNLILabel(Enum):
    ENTAILMENT = 0
    NEUTRAL = 1 
    CONTRADICTION = 2

int_to_label = {key: value for key, value in enumerate(MNLILabel)}
label_to_int = {value: key for key, value in int_to_label.items()}

In [24]:
for item in data:
    print(item["premise"])
    print(item["hypothesis"])
    print(MNLILabel(item["label"]))
    break

Conceptually cream skimming has two basic dimensions - product and geography.
Product and geography are what make cream skimming work.
MNLILabel.NEUTRAL


In [25]:
import anthropic
from anthropic import Anthropic
import os
from dotenv import load_dotenv

load_dotenv()

client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

message = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Here is a premise and hypothesis pair. Tell me if the relationship between them is entailment, neutral, or contradiction:\n\n" + 
                  f"Premise: {data[0]['premise']}\n" +
                  f"Hypothesis: {data[0]['hypothesis']}"
    }]
)

print(message.content[0])


TextBlock(citations=None, text='The relationship between this premise and hypothesis is NEUTRAL.\n\nHere\'s why:\n- The premise states that cream skimming has two basic dimensions: product and geography\n- The hypothesis makes a stronger claim that these two dimensions are what "make cream skimming work"\n- While the premise identifies these dimensions as components, it doesn\'t make any claims about their role in making cream skimming successful or functional\n- The hypothesis goes beyond the information provided in the premise by making an assertion about effectiveness/functionality\n\nThe hypothesis might be true, but we cannot determine this solely from the information given in the premise. Therefore, the relationship is neutral.', type='text')


In [26]:
print(message.content[0].text)

The relationship between this premise and hypothesis is NEUTRAL.

Here's why:
- The premise states that cream skimming has two basic dimensions: product and geography
- The hypothesis makes a stronger claim that these two dimensions are what "make cream skimming work"
- While the premise identifies these dimensions as components, it doesn't make any claims about their role in making cream skimming successful or functional
- The hypothesis goes beyond the information provided in the premise by making an assertion about effectiveness/functionality

The hypothesis might be true, but we cannot determine this solely from the information given in the premise. Therefore, the relationship is neutral.


In [27]:
from pydantic import BaseModel, Field

class EmotionInfo(BaseModel):
    arousal: float = Field(ge=0, le=1, description="Level of energy/activation in the emotion, from calm (0) to excited (1)")
    valence: float = Field(ge=0, le=1, description="Pleasantness of the emotion, from negative (0) to positive (1)")
    intensity: float = Field(ge=0, le=1, description="Overall strength of the emotional response, from weak (0) to strong (1)")

In [32]:
emotion_info_schema = json.dumps(EmotionInfo.model_json_schema())

In [36]:
import anthropic
from anthropic import Anthropic
import os
import json
from dotenv import load_dotenv

load_dotenv()

def get_emotion_info(
        input_text: str, 
        parse_error: str | None = None, 
        previous_output: str | None = None,
        try_count: int = 0,
        max_retries: int = 3
    ):
    client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    
    prompt = f"""Analyze the emotional content of this text and output a JSON object with the following schema:
    {emotion_info_schema}
    
    Only output valid JSON, nothing else.
    
    Text to analyze: {input_text}"""

    if parse_error:
        prompt += f"You already outputted the following JSON, but it was invalid:\n{previous_output}\nValidation errors: {parse_error}\nPlease fix the errors and output a valid JSON."

    message = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=1024,
        messages=[{
            "role": "user", 
            "content": prompt
        }]
    )
    try:
        response_json = json.loads(message.content[0].text)
        return response_json
    except json.JSONDecodeError as e:
        return get_emotion_info(input_text, str(e), message.content[0].text, try_count + 1, max_retries)


In [37]:

augmented_data = []
for item in data:
    emotion_info_premise = get_emotion_info(item["premise"])
    emotion_info_hypothesis = get_emotion_info(item["hypothesis"])

    augmented_data.append({
        "premise": item["premise"],
        "hypothesis": item["hypothesis"],
        "emotion_info_premise": emotion_info_premise,
        "emotion_info_hypothesis": emotion_info_hypothesis,
        "label": item["label"]
    })

In [38]:
json.dump(augmented_data, open("augmented_data.json", "w"))