# Objective:: Classify texts in 7 different discrete emotions - (anger, disgust, fear, joy, surprise and neutral using LLM)

In [1]:
# import libraries
import pandas as pd
import numpy as np
from transformers import pipeline

In [2]:
# load the data
df = pd.read_csv('books_cleaned.csv')
df.head()

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_subtitle,tagged_desc
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web:A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,6178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,6280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,6280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."


In [3]:
df.columns, df.shape

(Index(['isbn13', 'isbn10', 'title', 'authors', 'categories', 'thumbnail',
        'description', 'published_year', 'average_rating', 'num_pages',
        'ratings_count', 'title_subtitle', 'tagged_desc'],
       dtype='object'),
 (5197, 13))

In [4]:
# to reduce computing time, we wil select just a 1000 records
df = df[['isbn13','authors','categories','thumbnail','description','published_year', 'average_rating', 'title_subtitle', 'tagged_desc']]
df_new = df.sample(n=1000, random_state=42).reset_index(drop=True)
df_new.shape

(1000, 9)

In [5]:
# load a fine-tuned llm text classification model
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

2025-04-14 14:48:31.836215: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-04-14 14:48:31.836247: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-04-14 14:48:31.836257: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1744638511.836296 7762561 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1744638511.836339 7762561 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base.
If your task is similar to 

In [6]:
# examine a sample sequence - since the first description is quite long we can split it to enabl the model predict an accurate
# emotion
sequence = df_new['description'][0].split('.')

In [7]:
sequence

['Wherever vampires existed in the imaginations of different peoples, they adapted themselves to the customs of the local culture',
 ' In The Vampyre (1819), John Polidori introduced Lord Ruthven and established the vampire craze of the 19th century that resulted in a flood of German vampire poetry, French vampire drama, and British vampire fiction',
 " That tradition culminated in Bram Stoker's Dracula (1897), which fixed the character of the Transylvanian nobleman firmly in the public imagination",
 ' The contributors to this volume examine representations of the vampire in fiction, film, folklore, and popular culture',
 ' While some look at Stoker and the early literary vampire, others study the works of contemporary writers, such as Anne Rice and Chelsea Quinn Yarbro, vampirism as a metaphor for AIDS, and racial issues in such films as Blacula and Vampire in Brooklyn',
 '']

In [8]:
result = classifier(sequence)
result

[[{'label': 'anger', 'score': 0.007370923645794392},
  {'label': 'disgust', 'score': 0.022275930270552635},
  {'label': 'fear', 'score': 0.004900901112705469},
  {'label': 'joy', 'score': 0.0035773939453065395},
  {'label': 'neutral', 'score': 0.9419256448745728},
  {'label': 'sadness', 'score': 0.00413099117577076},
  {'label': 'surprise', 'score': 0.015818189829587936}],
 [{'label': 'anger', 'score': 0.028625933453440666},
  {'label': 'disgust', 'score': 0.08219161629676819},
  {'label': 'fear', 'score': 0.16321861743927002},
  {'label': 'joy', 'score': 0.11766994744539261},
  {'label': 'neutral', 'score': 0.5492902994155884},
  {'label': 'sadness', 'score': 0.01770937070250511},
  {'label': 'surprise', 'score': 0.04129417985677719}],
 [{'label': 'anger', 'score': 0.014531271532177925},
  {'label': 'disgust', 'score': 0.036096006631851196},
  {'label': 'fear', 'score': 0.16969963908195496},
  {'label': 'joy', 'score': 0.0522853322327137},
  {'label': 'neutral', 'score': 0.68335795402

In [18]:
for value in result:
    data = sorted(value, key= lambda x: x['label'])
print(data[0])

{'label': 'anger', 'score': 0.06413356214761734}


In [15]:
def compute_the_max_emotion_scores(predictions):

    emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
    per_emotion_score = {label: [] for label in emotion_labels}

    for results in predictions:
        sorted_predictions =  sorted(results, key= lambda x: x['label'])
        for index, label in enumerate(emotion_labels):
            per_emotion_score[label].append(sorted_predictions[index]['score'])

    return {label: np.max(scores) for label, scores in per_emotion_score.items()}

In [16]:
emotions_score_dict = compute_the_max_emotion_scores(result)
emotions_score_dict

{'anger': 0.06413356214761734,
 'disgust': 0.11760932952165604,
 'fear': 0.16969963908195496,
 'joy': 0.11766994744539261,
 'neutral': 0.9419256448745728,
 'sadness': 0.11169012635946274,
 'surprise': 0.07876547425985336}

In [19]:
from tqdm import tqdm

In [20]:
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
emotion_scores = {label: [] for label in emotion_labels}

# selected a few records and return their emotional sentiments
num_of_records = 100
isbn = []
for _ in tqdm(range(num_of_records)):
    isbn.append(df_new['isbn13'][_])
    sentence = df_new['description'][_].split('.')
    pred = classifier(sentence)
    scores = compute_the_max_emotion_scores(pred)
    for label in emotion_labels:
        emotion_scores[label].append(scores[label])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
100%|██████████| 100/100 [00:37<00:00,  2.63it/s]


In [21]:
emotion_scores

{'anger': [0.06413356214761734,
  0.23667699098587036,
  0.06413356214761734,
  0.06413356214761734,
  0.06413356214761734,
  0.3980664014816284,
  0.9820885062217712,
  0.06413356214761734,
  0.17247454822063446,
  0.06413356214761734,
  0.8081616759300232,
  0.2671797275543213,
  0.06413356214761734,
  0.06413356214761734,
  0.11721374094486237,
  0.6783784031867981,
  0.06413356214761734,
  0.06413356214761734,
  0.06413356214761734,
  0.06413356214761734,
  0.8963773250579834,
  0.06413356214761734,
  0.08836549520492554,
  0.06413356214761734,
  0.06800904870033264,
  0.6763833165168762,
  0.2490445226430893,
  0.6655375361442566,
  0.13236217200756073,
  0.06413356214761734,
  0.06413356214761734,
  0.06413356214761734,
  0.0514126792550087,
  0.06413356214761734,
  0.0751296654343605,
  0.5582448244094849,
  0.06413356214761734,
  0.1815921664237976,
  0.06413356214761734,
  0.06413356214761734,
  0.731264054775238,
  0.7867348790168762,
  0.16825875639915466,
  0.00842228345572

In [22]:
# creating a dataframe
emotions_df = pd.DataFrame(emotion_scores)
emotions_df['isbn13'] =  isbn
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise,isbn13
0,0.064134,0.117609,0.1697,0.11767,0.941926,0.11169,0.078765,9780313309335
1,0.236677,0.536687,0.117072,0.063171,0.928044,0.11169,0.210309,9780020442608
2,0.064134,0.104007,0.072854,0.040564,0.549477,0.821686,0.078765,9780719071157
3,0.064134,0.104007,0.051363,0.040564,0.936846,0.11169,0.978196,9780373483372
4,0.064134,0.104007,0.051363,0.931885,0.885922,0.11169,0.078765,9781904633273


In [23]:
# merging the emotions_df to the original df
df_new_with_emotions = pd.merge(df_new, emotions_df, on='isbn13')
df_new_with_emotions.head()

Unnamed: 0,isbn13,authors,categories,thumbnail,description,published_year,average_rating,title_subtitle,tagged_desc,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780313309335,James Craig Holte,Literary Criticism,http://books.google.com/books/content?id=mpmrW...,Wherever vampires existed in the imaginations ...,2002.0,0.0,The Fantastic Vampire:Studies in the Children ...,9780313309335 Wherever vampires existed in the...,0.064134,0.117609,0.1697,0.11767,0.941926,0.11169,0.078765
1,9780020442608,Clive Staples Lewis,Juvenile Fiction,http://books.google.com/books/content?id=fDD3C...,"The ""Dawn Treader"" is the first ship Narnia ha...",1970.0,4.09,The voyage of the Dawn Treader,"9780020442608 The ""Dawn Treader"" is the first ...",0.236677,0.536687,0.117072,0.063171,0.928044,0.11169,0.210309
2,9780719071157,Michael Cox;Adrian Guelke;Fiona Stephen,History,http://books.google.com/books/content?id=G5wuw...,Neither naively optimistic nor hopelessley pes...,2006.0,4.33,A Farewell to Arms?:Beyond the Good Friday Agr...,9780719071157 Neither naively optimistic nor h...,0.064134,0.104007,0.072854,0.040564,0.549477,0.821686,0.078765
3,9780373483372,Cait London,Fiction,http://books.google.com/books/content?id=Vn8lm...,Everyone in Amen Flats is shocked when prim El...,1997.0,4.22,Tallchief for Keeps,9780373483372 Everyone in Amen Flats is shocke...,0.064134,0.104007,0.051363,0.040564,0.936846,0.11169,0.978196
4,9781904633273,Louisa May Alcott,Family life,http://books.google.com/books/content?id=lH7BZ...,Timeless in its evocation of idealised family ...,2004.0,4.06,Little Women,9781904633273 Timeless in its evocation of ide...,0.064134,0.104007,0.051363,0.931885,0.885922,0.11169,0.078765


In [24]:
df_new_with_emotions.to_csv('books_with_emotions.csv', index=False)

In [25]:
df_new_with_emotions.shape

(100, 16)

In [26]:
df_new_with_emotions.head()

Unnamed: 0,isbn13,authors,categories,thumbnail,description,published_year,average_rating,title_subtitle,tagged_desc,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780313309335,James Craig Holte,Literary Criticism,http://books.google.com/books/content?id=mpmrW...,Wherever vampires existed in the imaginations ...,2002.0,0.0,The Fantastic Vampire:Studies in the Children ...,9780313309335 Wherever vampires existed in the...,0.064134,0.117609,0.1697,0.11767,0.941926,0.11169,0.078765
1,9780020442608,Clive Staples Lewis,Juvenile Fiction,http://books.google.com/books/content?id=fDD3C...,"The ""Dawn Treader"" is the first ship Narnia ha...",1970.0,4.09,The voyage of the Dawn Treader,"9780020442608 The ""Dawn Treader"" is the first ...",0.236677,0.536687,0.117072,0.063171,0.928044,0.11169,0.210309
2,9780719071157,Michael Cox;Adrian Guelke;Fiona Stephen,History,http://books.google.com/books/content?id=G5wuw...,Neither naively optimistic nor hopelessley pes...,2006.0,4.33,A Farewell to Arms?:Beyond the Good Friday Agr...,9780719071157 Neither naively optimistic nor h...,0.064134,0.104007,0.072854,0.040564,0.549477,0.821686,0.078765
3,9780373483372,Cait London,Fiction,http://books.google.com/books/content?id=Vn8lm...,Everyone in Amen Flats is shocked when prim El...,1997.0,4.22,Tallchief for Keeps,9780373483372 Everyone in Amen Flats is shocke...,0.064134,0.104007,0.051363,0.040564,0.936846,0.11169,0.978196
4,9781904633273,Louisa May Alcott,Family life,http://books.google.com/books/content?id=lH7BZ...,Timeless in its evocation of idealised family ...,2004.0,4.06,Little Women,9781904633273 Timeless in its evocation of ide...,0.064134,0.104007,0.051363,0.931885,0.885922,0.11169,0.078765


In [None]:
df_new_with_emotions['thumbnail'][0]

'http://books.google.com/books/content?id=mpmrWCQ1e3MC&printsec=frontcover&img=1&zoom=1&source=gbs_api'

# New 

In [2]:
import pandas as pd
data =  pd.read_csv('books_with_emotions.csv')
data.head()

Unnamed: 0,isbn13,categories,thumbnail,description,title_subtitle,tagged_desc,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780313309335,Literary Criticism,http://books.google.com/books/content?id=mpmrW...,Wherever vampires existed in the imaginations ...,The Fantastic Vampire:Studies in the Children ...,9780313309335 Wherever vampires existed in the...,0.064134,0.117609,0.1697,0.11767,0.941926,0.11169,0.078765
1,9780020442608,Juvenile Fiction,http://books.google.com/books/content?id=fDD3C...,"The ""Dawn Treader"" is the first ship Narnia ha...",The voyage of the Dawn Treader,"9780020442608 The ""Dawn Treader"" is the first ...",0.236677,0.536687,0.117072,0.063171,0.928044,0.11169,0.210309
2,9780719071157,History,http://books.google.com/books/content?id=G5wuw...,Neither naively optimistic nor hopelessley pes...,A Farewell to Arms?:Beyond the Good Friday Agr...,9780719071157 Neither naively optimistic nor h...,0.064134,0.104007,0.072854,0.040564,0.549477,0.821686,0.078765
3,9780373483372,Fiction,http://books.google.com/books/content?id=Vn8lm...,Everyone in Amen Flats is shocked when prim El...,Tallchief for Keeps,9780373483372 Everyone in Amen Flats is shocke...,0.064134,0.104007,0.051363,0.040564,0.936846,0.11169,0.978196
4,9781904633273,Family life,http://books.google.com/books/content?id=lH7BZ...,Timeless in its evocation of idealised family ...,Little Women,9781904633273 Timeless in its evocation of ide...,0.064134,0.104007,0.051363,0.931885,0.885922,0.11169,0.078765


In [5]:
data.sort_values('joy', ascending=False)

Unnamed: 0,isbn13,categories,thumbnail,description,title_subtitle,tagged_desc,anger,disgust,fear,joy,neutral,sadness,surprise
18,9781904919520,Boats and boating,http://books.google.com/books/content?id=Qk7bO...,.0000000000Martyrs to hypochondria and general...,Three Men in a Boat:To Say Nothing of the Dog,9781904919520 .0000000000Martyrs to hypochondr...,0.064134,0.104007,0.530805,0.976017,0.854453,0.111690,0.105776
70,9780312266059,Photography,http://books.google.com/books/content?id=TOikA...,"""...I was convinced that Story of O was going ...",The Illustrated Story Of O,"9780312266059 ""...I was convinced that Story o...",0.064134,0.914879,0.093180,0.969811,0.931029,0.664391,0.931075
58,9780140320435,Adventure stories,http://books.google.com/books/content?id=BmE7b...,Mr. Willy Wonka might be a genius with chocola...,Charlie and the Great Glass Elevator,9780140320435 Mr. Willy Wonka might be a geniu...,0.029802,0.031057,0.125918,0.959282,0.897749,0.030455,0.117694
87,9781401301941,Cooking,http://books.google.com/books/content?id=gMDlA...,Cooking sensation Jamie Oliver returns with a ...,Jamie's Dinners:The Essential Family Cookbook,9781401301941 Cooking sensation Jamie Oliver r...,0.064134,0.104007,0.051363,0.939986,0.624496,0.111690,0.078765
4,9781904633273,Family life,http://books.google.com/books/content?id=lH7BZ...,Timeless in its evocation of idealised family ...,Little Women,9781904633273 Timeless in its evocation of ide...,0.064134,0.104007,0.051363,0.931885,0.885922,0.111690,0.078765
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35,9780060587031,Juvenile Fiction,http://books.google.com/books/content?id=6EqMP...,"""I'll swap you my dad,"" I said. ""Oh-oh,"" said ...",The Day I Swapped My Dad for Two Goldfish,"9780060587031 ""I'll swap you my dad,"" I said. ...",0.558245,0.059995,0.174693,0.016313,0.850982,0.036710,0.733545
32,9780452277243,Fiction,http://books.google.com/books/content?id=UqbTs...,Follows a young woman from her early life on t...,Man Crazy:A Novel,9780452277243 Follows a young woman from her e...,0.051413,0.326500,0.423316,0.007991,0.137254,0.047976,0.005551
10,9780373767793,Fiction,http://books.google.com/books/content?id=p6tKm...,Businessman Chase Ramsey's audacious terms sho...,Blackmailed Into Bed,9780373767793 Businessman Chase Ramsey's audac...,0.808162,0.139104,0.136079,0.006941,0.906829,0.656320,0.069970
22,9780140185331,Fiction,http://books.google.com/books/content?id=REy21...,"Just arrived in Vienna, Rollo Martins discover...",The Third Man and the Fallen Idol,"9780140185331 Just arrived in Vienna, Rollo Ma...",0.088365,0.071151,0.009670,0.002193,0.032898,0.782506,0.013217


In [11]:
for _, row in data.head(1).iterrows():
    print(row['description'].split()[:30])

['Wherever', 'vampires', 'existed', 'in', 'the', 'imaginations', 'of', 'different', 'peoples,', 'they', 'adapted', 'themselves', 'to', 'the', 'customs', 'of', 'the', 'local', 'culture.', 'In', 'The', 'Vampyre', '(1819),', 'John', 'Polidori', 'introduced', 'Lord', 'Ruthven', 'and', 'established']
