In [None]:
# Import necessary libraries
from dotenv import load_dotenv
import os
import pandas as pd
from typing import TypeVar, Any
from pydantic import BaseModel, Field, create_model

import litellm
from litellm import completion
from instructor import from_litellm, Mode

from typing import Literal

import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

# Load definitions from the Oxford dictionary

In [None]:
df = pd.read_csv("data/definitions_ekman_emotions.csv")
definitions = "\n".join(f"{row['emotion']}: {row['definition']}" for _, row in df.iterrows())

# Create a system prompt for LLMs

In [None]:
system_prompt = f"""
You’re given a piece of text.  Your job is to pick **all** emotions (one or more) from this list:
  [joy, anger, sadness, fear, disgust, surprise, neutral]

Respond with a **JSON array** of exact emotion keywords.  
– If more than one emotion fits, list them all.  
– If none apply, return an empty array: []  
– Don’t include any explanation or extra text.

Emotion definitions:
{definitions}
"""

# Creating instructor client from litellm

In [42]:
litellm.drop_params = True
client = from_litellm(completion, mode=Mode.JSON)  

# Define EkmanEmotion as a Literal of valid emotions

In [43]:

EkmanEmotion = Literal["Anger", "Disgust", "Fear", "Joy", "Sadness", "Surprise", "Neutral"]



# Define the EmotionPrediction model

In [44]:
from typing import List

class EmotionPrediction(BaseModel):
    emotion: List[EkmanEmotion]

# Initialize the LLM client with JSON response mode

In [45]:
# Define a base response model using Pydantic.
class BaseResponse(BaseModel):
    """A default response model that stores a list of predicted Ekman emotions. We will use this to predict the emotions of a review."""
    answer: str

# Define a generic type for later use, bounded to Pydantic BaseModel
ResponseType = TypeVar("ResponseType", bound=BaseModel)

class LLMCaller:
    """
    A class to interact with a Large Language Model (LLM)
    using the LiteLLM and Instructor libraries.
    
    Designed to send prompts and receive structured responses
    as Pydantic models (e.g., predicted emotions).
    """
    def __init__(self, api_key: str, project_id: str, api_url: str, model_id: str, params: dict[str, Any]):
        """Initializes the LLMCaller with Watsonx credentials and configuration."""
        self.api_key = api_key
        self.project_id = project_id
        self.api_url = api_url
        self.model_id = model_id
        self.params = params

        litellm.drop_params = True
        self.client = from_litellm(completion, mode=Mode.JSON)

    def create_response_model(self, title: str, fields: dict) -> ResponseType:
        """ Dynamically creates a Pydantic response model for the LLM's output.
        Args:
            title (str): The name of the response model.
            fields (dict): A dictionary defining the fields of the response model.
                           Keys are field names, and values are tuples of (type, Field).

        Returns:
            ResponseType: A dynamically created Pydantic model class.
        """
        return create_model(title, **fields, __base__=BaseResponse)

    def invoke(self, prompt: str, response_model: ResponseType = BaseResponse, **kwargs) -> ResponseType:
        """ Sends a prompt to the LLM and retrieves a structured response.

        Args:
            prompt (str): The input prompt to send to the LLM.
            response_model (ResponseType): The Pydantic model to structure the LLM's response.
                                           Defaults to BaseResponse.
            **kwargs: Additional arguments to pass to the LLM client.

        Returns:
            ResponseType: The structured response from the LLM, parsed into the specified response model.
        """
        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=[{
                "role": "user",
                "content": prompt + "\n\nRespond using this structure: " + str(response_model.__annotations__)
            }],
            project_id=self.project_id,
            apikey=self.api_key,
            api_base=self.api_url,
            response_model=response_model,
            **kwargs
        )
        return response



# Initialize the LLMCaller

In [46]:
load_dotenv()

llm = LLMCaller(
    api_key=os.getenv("WX_API_KEY"),
    project_id=os.getenv("WX_PROJECT_ID_RAG"),
    api_url=os.getenv("WX_URL"),
    model_id="watsonx/mistralai/mistral-large",
    params={"max_tokens": 100}
)


## Define the EmotionResponse Model

This Pydantic model specifies the expected output format from the LLM when detecting emotions in a text.

In [47]:
class EmotionResponse(BaseModel):
    emotions: list[str] = Field(..., description="The list of Ekman emotions expressed in the review.")


## Test the Emotion Detection

This example sends a sample input to the `LLMCaller` using the `EmotionResponse` model. 
The input text contains mixed emotional signals, and the model is expected to return 
a list of all applicable Ekman emotions (e.g., both "fear" and "joy").

In [48]:
llm.invoke("that scared me! that was a lot of fun", response_model=EmotionResponse) 

EmotionResponse(emotions=['fear', 'joy'])

# Manual Prompt and Direct LLM Invocation

In [None]:

# define a prompt
prompt = """You are a highly precise emotion-classification agent that excels at detecting emotion from text. 
you will be given a text / review and similar examples with labels, along with the definitions of each the emotions.

Your task is to read a text / customer review, understand its content, and assign *one or more* of these emotions: 
anger, disgust, fear, joy, sadness, surprise, or neutral if no clear emotion is detected.
"""

# make a request to the LLM
response = client.chat.completions.create( 
            model="watsonx/mistralai/mistral-large", 
            messages=[
                {
                    "role": "user",
                    "content": prompt, 
                }
            ],
            project_id=os.getenv("WX_PROJECT_ID_RAG"), 
            apikey=os.getenv("WX_API_KEY"),
            api_base=os.getenv("WX_API_URL"),
            response_model=EmotionResponse, 
)

# Make predictions and upload ekman_test_with_predictions_sample as a csv

In [52]:
# Load only the first N rows for faster testing
N = 500
df_test = pd.read_csv("data/ekman_test.csv").head(N)

predicted_emotions_list = []

# Use the LLMCaller instance to predict emotions
for text in df_test["text"]:
    try:
        response = llm.invoke(prompt=text, response_model=EmotionPrediction)
        predicted_emotions_list.append(response.emotion)
    except Exception as e:
        print(f"Error with: {text[:50]}... -> {e}")
        predicted_emotions_list.append(["neutral"])  # fallback

# Print some examples for inspection
for i, item in enumerate(predicted_emotions_list[:5]):
    print(f"{i}: {item} ({type(item)})")

# Clean and assign predictions
cleaned_predictions = []
for e in predicted_emotions_list:
    if isinstance(e, list):
        cleaned_predictions.append(", ".join(str(x) for x in e))
    else:
        cleaned_predictions.append(str(e))

df_test["predicted_emotions"] = cleaned_predictions

# Save to CSV
df_test.to_csv("data/ekman_test_with_predictions_sample.csv", index=False)



0: ['Joy'] (<class 'list'>)
1: ['Joy'] (<class 'list'>)
2: ['Disgust'] (<class 'list'>)
3: ['Neutral'] (<class 'list'>)
4: ['Anger', 'Disgust', 'Sadness'] (<class 'list'>)


# Classification report

In [53]:

# True labels as lists of emotions
y_true = df_test[["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]].values
y_true = y_true.tolist()  # already binary format

# Predicted emotions as string -> list
df_test["predicted_emotions"] = df_test["predicted_emotions"].fillna("").apply(lambda x: [e.strip().capitalize() for e in x.split(",") if e.strip()])

# Binarize predicted labels
mlb = MultiLabelBinarizer(classes=["Anger", "Disgust", "Fear", "Joy", "Neutral", "Sadness", "Surprise"])
y_pred = mlb.fit_transform(df_test["predicted_emotions"])

#classification_report
print(classification_report(y_true, y_pred, target_names=mlb.classes_))


              precision    recall  f1-score   support

       Anger       0.36      0.53      0.43        74
     Disgust       0.08      0.57      0.14        14
        Fear       0.13      0.67      0.22         9
         Joy       0.69      0.59      0.64       207
     Neutral       0.41      0.39      0.40       140
     Sadness       0.20      0.49      0.28        45
    Surprise       0.23      0.39      0.29        64

   micro avg       0.35      0.50      0.41       553
   macro avg       0.30      0.52      0.34       553
weighted avg       0.46      0.50      0.46       553
 samples avg       0.41      0.51      0.44       553



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#Print accuracy from the classification report
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.28
