In [141]:
# Import necessary libraries
from dotenv import load_dotenv
import os
import pandas as pd
from typing import TypeVar, Any
from pydantic import BaseModel, Field, create_model

import litellm
from litellm import completion
from instructor import from_litellm, Mode

from typing import Literal

import pandas as pd
from sklearn.metrics import classification_report

# Load definitions from the Oxford dictionary

In [142]:
df = pd.read_csv("data/oxford_ekman_emotions.csv")
definitions = "\n".join(f"{row['emotion']}: {row['definition']}" for _, row in df.iterrows())

# Create a system prompt for LLMs

In [143]:
system_prompt = f"""
You’re given a piece of text.  Your job is to pick **all** emotions (one or more) from this list:
  [joy, anger, sadness, fear, disgust, surprise, neutral]

Respond with a **JSON array** of exact emotion keywords.  
– If more than one emotion fits, list them all.  
– If none apply, return an empty array: []  
– Don’t include any explanation or extra text.

Emotion definitions:
{definitions}
"""

# Creating instructor client from litellm

In [144]:
litellm.drop_params = True
client = from_litellm(completion, mode=Mode.JSON)  

# Define EkmanEmotion as a Literal of valid emotions

In [145]:

EkmanEmotion = Literal["Anger", "Disgust", "Fear", "Joy", "Sadness", "Surprise", "Neutral"]



# Define the EmotionPrediction model

In [146]:
class EmotionPrediction(BaseModel):
    emotion: EkmanEmotion

In [None]:
#response_model = create_model(
#    "MyResponseModel", 
#    reasoning=(str, Field(description="The short reasoning behind the answer")),
#    answer=(str, Field(description="Your answer to the question")),
#    __base__=BaseModel
#) 

# Initialize the LLM client with JSON response mode

In [147]:
# Define a base response model using Pydantic.
class BaseResponse(BaseModel):
    """A default response model that stores a list of predicted Ekman emotions. We will use this to predict the emotions of a review."""
    answer: str

# Define a generic type for later use, bounded to Pydantic BaseModel
ResponseType = TypeVar("ResponseType", bound=BaseModel)

class LLMCaller:
    """
    A class to interact with a Large Language Model (LLM)
    using the LiteLLM and Instructor libraries.
    
    Designed to send prompts and receive structured responses
    as Pydantic models (e.g., predicted emotions).
    """
    def __init__(self, api_key: str, project_id: str, api_url: str, model_id: str, params: dict[str, Any]):
        """Initializes the LLMCaller with Watsonx credentials and configuration."""
        self.api_key = api_key
        self.project_id = project_id
        self.api_url = api_url
        self.model_id = model_id
        self.params = params

        litellm.drop_params = True
        self.client = from_litellm(completion, mode=Mode.JSON)

    def create_response_model(self, title: str, fields: dict) -> ResponseType:
        """ Dynamically creates a Pydantic response model for the LLM's output.
        Args:
            title (str): The name of the response model.
            fields (dict): A dictionary defining the fields of the response model.
                           Keys are field names, and values are tuples of (type, Field).

        Returns:
            ResponseType: A dynamically created Pydantic model class.
        """
        return create_model(title, **fields, __base__=BaseResponse)

    def invoke(self, prompt: str, response_model: ResponseType = BaseResponse, **kwargs) -> ResponseType:
        """ Sends a prompt to the LLM and retrieves a structured response.

        Args:
            prompt (str): The input prompt to send to the LLM.
            response_model (ResponseType): The Pydantic model to structure the LLM's response.
                                           Defaults to BaseResponse.
            **kwargs: Additional arguments to pass to the LLM client.

        Returns:
            ResponseType: The structured response from the LLM, parsed into the specified response model.
        """
        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=[{
                "role": "user",
                "content": prompt + "\n\nRespond using this structure: " + str(response_model.__annotations__)
            }],
            project_id=self.project_id,
            apikey=self.api_key,
            api_base=self.api_url,
            response_model=response_model,
            **kwargs
        )
        return response



# Initialize the LLMCaller

In [148]:
load_dotenv()

llm = LLMCaller(
    api_key=os.getenv("WX_API_KEY"),
    project_id=os.getenv("WX_PROJECT_ID_RAG"),
    api_url=os.getenv("WX_URL"),
    model_id="watsonx/mistralai/mistral-large",
    params={"max_tokens": 100}
)


## Define the EmotionResponse Model

This Pydantic model specifies the expected output format from the LLM when detecting emotions in a text.

In [149]:
class EmotionResponse(BaseModel):
    emotions: list[str] = Field(..., description="The list of Ekman emotions expressed in the review.")


## Test the Emotion Detection

This example sends a sample input to the `LLMCaller` using the `EmotionResponse` model. 
The input text contains mixed emotional signals, and the model is expected to return 
a list of all applicable Ekman emotions (e.g., both "fear" and "joy").

In [150]:
llm.invoke("that scared me! that was a lot of fun", response_model=EmotionResponse) 

EmotionResponse(emotions=['fear', 'joy'])

# Manual Prompt and Direct LLM Invocation

In [151]:

# define a prompt
prompt = """You are an emotion detection expert. Identify one or more of the following 7 emotions sadness, anger, joy, surprise, fear, disgust, neutral. Do not make new emotions.
"""

# make a request to the LLM
response = client.chat.completions.create( 
            model="watsonx/mistralai/mistral-large", 
            messages=[
                {
                    "role": "user",
                    "content": prompt, 
                }
            ],
            project_id=os.getenv("WX_PROJECT_ID_RAG"), 
            apikey=os.getenv("WX_API_KEY"),
            api_base=os.getenv("WX_API_URL"),
            response_model=EmotionResponse, 
)

# Upload ekman_test_with_predictions_sample as a csv

In [None]:
# Load only the first N rows for faster testing
N = 50
df_test = pd.read_csv("data/ekman_test.csv").head(N)


predicted_emotions_list = []

for text in df_test["text"]:
# Call the LLM to predict emotion using the EmotionPrediction response model
    try:
        response = llm.invoke(prompt=text, response_model=EmotionPrediction)
        predicted_emotions_list.append([response.emotion])

# Handle errors gracefully, defaulting to "neutral" if prediction fails
    except Exception as e:
        print(f"Error with: {text[:50]}... -> {e}")
        predicted_emotions_list.append(["neutral"])



df_test["predicted_emotions"] = [", ".join(e) for e in predicted_emotions_list]
df_test.to_csv("data/ekman_test_with_predictions_sample.csv", index=False)



# Classification report

In [None]:
#Load CSV
df = pd.read_csv('data/ekman_test_with_predictions_sample.csv')

#Define your one-hot true-emotion columns
emotion_cols = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

#Ensure these columns actually exist
missing = [c for c in emotion_cols if c not in df.columns]
if missing:
    raise KeyError(f"The following emotion columns are missing: {missing}")

#Derive the true label from the one-hot columns
df['actual'] = df[emotion_cols].idxmax(axis=1).str.capitalize()

#Standardize predicted labels
df['predicted'] = df['predicted_emotions'].str.capitalize()

print(classification_report(
    df['actual'],
    df['predicted'],
    zero_division=0
))



              precision    recall  f1-score   support

       Anger       0.41      0.39      0.40        74
     Disgust       0.06      0.33      0.10         9
        Fear       0.38      0.56      0.45         9
         Joy       0.69      0.50      0.58       199
     Neutral       0.44      0.36      0.40       140
     Sadness       0.18      0.41      0.26        29
    Surprise       0.26      0.28      0.27        40

    accuracy                           0.42       500
   macro avg       0.35      0.40      0.35       500
weighted avg       0.50      0.42      0.45       500

