In [1]:
import pandas as pd

books = pd.read_csv("books_with_categories.csv")

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import os
from huggingface_hub import InferenceClient

my_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not my_key:
    print("Error: HUGGINGFACEHUB_API_TOKEN not found in .env file.")
else:
    print("API Client Initialized.")

client = InferenceClient(token=my_key)
MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"


def classifier(text_input: str):
    if not text_input or not isinstance(text_input, str):
        return "Invalid input. Please provide a text string."

    try:
        result = client.text_classification(
            text=text_input,
            model=MODEL_NAME,
            top_k=None,
        )
        return result
    except Exception as e:
        # This will catch rate limits, 504 timeouts, etc.
        return f"API Error: {e}"

classifier("I love this!")

API Client Initialized.


[TextClassificationOutputElement(label='joy', score=0.9771687984466553),
 TextClassificationOutputElement(label='surprise', score=0.008528676815330982),
 TextClassificationOutputElement(label='neutral', score=0.005764589179307222),
 TextClassificationOutputElement(label='anger', score=0.004419777542352676),
 TextClassificationOutputElement(label='sadness', score=0.002092391485348344)]

In [4]:
books["description"][0]


'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world ha

In [5]:
classifier(books["description"][0])

[TextClassificationOutputElement(label='fear', score=0.6548415422439575),
 TextClassificationOutputElement(label='neutral', score=0.1698518991470337),
 TextClassificationOutputElement(label='sadness', score=0.1164088100194931),
 TextClassificationOutputElement(label='surprise', score=0.02070065215229988),
 TextClassificationOutputElement(label='disgust', score=0.019100727513432503)]

In [6]:
sentences = books["description"][0].split(".")

predictions = [classifier(s.strip()) for s in sentences if s.strip()]

In [7]:
sentences[0]


'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'

In [8]:
predictions[0]

[TextClassificationOutputElement(label='surprise', score=0.7296026349067688),
 TextClassificationOutputElement(label='neutral', score=0.14038579165935516),
 TextClassificationOutputElement(label='fear', score=0.06816219538450241),
 TextClassificationOutputElement(label='joy', score=0.04794241860508919),
 TextClassificationOutputElement(label='anger', score=0.009156355634331703)]

In [9]:
sentences[3]

' Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist'

In [10]:
predictions[3]

[TextClassificationOutputElement(label='fear', score=0.9839729070663452),
 TextClassificationOutputElement(label='neutral', score=0.004363971762359142),
 TextClassificationOutputElement(label='sadness', score=0.004239327739924192),
 TextClassificationOutputElement(label='anger', score=0.0027069835923612118),
 TextClassificationOutputElement(label='surprise', score=0.0023316992446780205)]

In [11]:
predictions


[[TextClassificationOutputElement(label='surprise', score=0.7296026349067688),
  TextClassificationOutputElement(label='neutral', score=0.14038579165935516),
  TextClassificationOutputElement(label='fear', score=0.06816219538450241),
  TextClassificationOutputElement(label='joy', score=0.04794241860508919),
  TextClassificationOutputElement(label='anger', score=0.009156355634331703)],
 [TextClassificationOutputElement(label='neutral', score=0.46625006198883057),
  TextClassificationOutputElement(label='disgust', score=0.338238924741745),
  TextClassificationOutputElement(label='joy', score=0.08201294392347336),
  TextClassificationOutputElement(label='sadness', score=0.06111681088805199),
  TextClassificationOutputElement(label='anger', score=0.029641350731253624)],
 [TextClassificationOutputElement(label='neutral', score=0.6978468298912048),
  TextClassificationOutputElement(label='sadness', score=0.20104210078716278),
  TextClassificationOutputElement(label='disgust', score=0.0365927

In [12]:
sorted(predictions[0], key=lambda x: x["label"])


[TextClassificationOutputElement(label='anger', score=0.009156355634331703),
 TextClassificationOutputElement(label='fear', score=0.06816219538450241),
 TextClassificationOutputElement(label='joy', score=0.04794241860508919),
 TextClassificationOutputElement(label='neutral', score=0.14038579165935516),
 TextClassificationOutputElement(label='surprise', score=0.7296026349067688)]

In [16]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}
#
# def calculate_max_emotion_scores(predictions):
#     per_emotion_scores = {label: [] for label in emotion_labels}
#     for prediction in predictions:
#         sorted_predictions = sorted(prediction, key=lambda x: x["label"])
#         for index, label in enumerate(emotion_labels):
#             per_emotion_scores[label].append(sorted_predictions[index]["score"])
#     return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [14]:
# for i in range(10):
#     isbn.append(books["isbn13"][i])
#     sentences = books["description"][i].split(".")
#     predictions = [classifier(s.strip()) for s in sentences if s.strip()]
#     max_scores = calculate_max_emotion_scores(predictions)
#     for label in emotion_labels:
#         emotion_scores[label].append(max_scores[label])

In [18]:
def calculate_max_emotion_scores(predictions_for_all_sentences):
    # This dictionary will hold all scores found for each emotion
    per_emotion_scores = {label: [] for label in emotion_labels}

    # 'prediction_list' is the list of emotions for a *single* sentence
    for prediction_list in predictions_for_all_sentences:

        # 'prediction' is the dictionary for a *single* emotion
        for prediction in prediction_list:
            label = prediction["label"]
            score = prediction["score"]

            # Add the score to the correct list
            if label in per_emotion_scores:
                per_emotion_scores[label].append(score)

    # Now, find the max for each list, handling empty lists
    max_scores = {}
    for label, scores in per_emotion_scores.items():
        if scores:  # If the list is not empty
            max_scores[label] = np.max(scores)
        else:
            max_scores[label] = 0.0  # Default score if no sentences were found

    return max_scores

In [19]:
# (Use the new, robust calculate_max_emotion_scores function from Solution 2)

for i in range(10):
    isbn.append(books["isbn13"][i])

    # 1. Get all sentences for the book
    sentences = [s.strip() for s in books["description"][i].split(".") if s.strip()]

    predictions = []
    if sentences:  # Only call the classifier if we have sentences
        try:
            # 2. Call the classifier ONCE for all sentences
            predictions = [classifier(s) for s in sentences]
        except Exception as e:
            print(f"Error during classification: {e}")
            predictions = []

    # 3. Calculate max scores (this returns one dict for the whole book)
    if predictions:
        max_scores = calculate_max_emotion_scores(predictions)
    else:
        max_scores = {label: 0.0 for label in emotion_labels}

    # 4. Append the max scores
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [20]:
emotion_scores

{'anger': [np.float64(0.029641350731253624),
  np.float64(0.5944693684577942),
  np.float64(0.041300926357507706),
  np.float64(0.3253040611743927),
  np.float64(0.0917322039604187),
  np.float64(0.20615458488464355),
  np.float64(0.5513831973075867),
  np.float64(0.023656265810132027),
  np.float64(0.30066949129104614),
  0.0],
 'disgust': [np.float64(0.338238924741745),
  np.float64(0.4619905352592468),
  np.float64(0.024568382650613785),
  np.float64(0.12576323747634888),
  np.float64(0.19743363559246063),
  np.float64(0.7573778033256531),
  np.float64(0.1822037547826767),
  np.float64(0.056707821786403656),
  np.float64(0.28747639060020447),
  np.float64(0.17792630195617676)],
 'fear': [np.float64(0.9839729070663452),
  np.float64(0.9352148771286011),
  np.float64(0.9732853174209595),
  np.float64(0.43633896112442017),
  np.float64(0.09504341334104538),
  np.float64(0.03679512068629265),
  np.float64(0.7008544206619263),
  np.float64(0.40449607372283936),
  np.float64(0.89700043201

In [None]:
from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])

    # 1. Get all sentences for the book
    sentences = [s.strip() for s in books["description"][i].split(".") if s.strip()]

    predictions = []
    if sentences:  # Only call the classifier if we have sentences
        try:
            # 2. Call the classifier ONCE for all sentences
            predictions = [classifier(s) for s in sentences]
        except Exception as e:
            print(f"Error during classification: {e}")
            predictions = []

    # 3. Calculate max scores (this returns one dict for the whole book)
    if predictions:
        max_scores = calculate_max_emotion_scores(predictions)
    else:
        max_scores = {label: 0.0 for label in emotion_labels}

    # 4. Append the max scores
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [None]:

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])

    sentences = [s.strip() for s in books["description"][i].split(".") if s.strip()]
    predictions = []
    if sentences:
        try:
            raw_preds = [classifier(s) for s in sentences]
            # Only keep valid predictions (lists of dicts)
            predictions = [p for p in raw_preds if isinstance(p, list) and all(isinstance(x, dict) for x in p)]
        except Exception as e:
            print(f"Error during classification: {e}")
            predictions = []

    if predictions:
        max_scores = calculate_max_emotion_scores(predictions)
    else:
        max_scores = {label: 0.0 for label in emotion_labels}

    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [None]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [None]:
emotions_df

In [None]:
books = pd.merge(books, emotions_df, on = "isbn13")

In [None]:
books

In [None]:
books.to_csv("test_books_with_emotions.csv", index = False)