In [16]:
import pandas as pd
import numpy as np

books = pd.read_csv("books_with_categories.csv")

In [3]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None,
                      device= "cuda")
classifier("I love this!")


Device set to use cuda


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528676815330982},
  {'label': 'neutral', 'score': 0.005764597095549107},
  {'label': 'anger', 'score': 0.004419785924255848},
  {'label': 'sadness', 'score': 0.002092393347993493},
  {'label': 'disgust', 'score': 0.0016119939973577857},
  {'label': 'fear', 'score': 0.0004138521908316761}]]

In [8]:
# breaking the sentences and passing each sentence to the classifier
sentences = books["description"][0].split(".")
predictions =  classifier(sentences)

In [9]:
sentences[0]

'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'

In [10]:
predictions[0]

[{'label': 'surprise', 'score': 0.7296027541160583},
 {'label': 'neutral', 'score': 0.140385702252388},
 {'label': 'fear', 'score': 0.06816215813159943},
 {'label': 'joy', 'score': 0.047942426055669785},
 {'label': 'anger', 'score': 0.009156357496976852},
 {'label': 'disgust', 'score': 0.002628474263474345},
 {'label': 'sadness', 'score': 0.0021221607457846403}]

In [13]:
print(f"{sentences[5]} \n {predictions[5]}")

 Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world has to offer 
 [{'label': 'joy', 'score': 0.9327970147132874}, {'label': 'disgust', 'score': 0.037717871367931366}, {'label': 'neutral', 'score': 0.015891967341303825}, {'label': 'sadness', 'score': 0.006444553378969431}, {'label': 'anger', 'score': 0.0050250268541276455}, {'label': 'surprise', 'score': 0.0015812092460691929}, {'label': 'fear', 'score': 0.0005423107650130987}]


In [14]:
sorted(predictions[0], key=lambda x:x["label"])

[{'label': 'anger', 'score': 0.009156357496976852},
 {'label': 'disgust', 'score': 0.002628474263474345},
 {'label': 'fear', 'score': 0.06816215813159943},
 {'label': 'joy', 'score': 0.047942426055669785},
 {'label': 'neutral', 'score': 0.140385702252388},
 {'label': 'sadness', 'score': 0.0021221607457846403},
 {'label': 'surprise', 'score': 0.7296027541160583}]

In [17]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

In [None]:
def calculate_max_emotion_scores(predictions):
    """
    Calculate the maximum emotion scores for each emotion label from the predictions.

    Args:
    predictions (list): A list of predictions, where each prediction is a list of dictionaries
                        containing 'label' and 'score' keys.

    Returns:
    dict: A dictionary with emotion labels as keys and their maximum scores as values.
    """
    per_emotion_scores = {label: [] for label in emotion_labels}
    
    for prediction in predictions:
        # Sort the predictions by label to ensure consistent order
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        
        # Append the score of each emotion to the corresponding list in per_emotion_scores
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    
    # Calculate the maximum score for each emotion label
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [20]:
for i in range(10):
    # Append the ISBN-13 of the current book to the isbn list
    isbn.append(books["isbn13"][i])
    
    # Split the description of the current book into sentences
    sentences = books["description"][i].split(".")
    
    # Use the classifier to predict the emotions for each sentence
    predictions = classifier(sentences)
    
    # Calculate the maximum emotion scores for each emotion label from the predictions
    max_scores = calculate_max_emotion_scores(predictions)
    
    # Append the maximum scores for each emotion label to the corresponding list in emotion_scores
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [21]:
emotion_scores

{'anger': [0.06413368880748749,
  0.6126189827919006,
  0.06413368880748749,
  0.3514832854270935,
  0.08141238987445831,
  0.232224702835083,
  0.5381833910942078,
  0.06413368880748749,
  0.30067017674446106,
  0.06413368880748749,
  0.06413368880748749,
  0.6126189827919006,
  0.06413368880748749,
  0.3514832854270935,
  0.08141238987445831,
  0.232224702835083,
  0.5381833910942078,
  0.06413368880748749,
  0.30067017674446106,
  0.06413368880748749],
 'disgust': [0.27359139919281006,
  0.34828436374664307,
  0.10400675237178802,
  0.15072230994701385,
  0.18449531495571136,
  0.7271749377250671,
  0.15585504472255707,
  0.10400675237178802,
  0.27948129177093506,
  0.1779269576072693,
  0.27359139919281006,
  0.34828436374664307,
  0.10400675237178802,
  0.15072230994701385,
  0.18449531495571136,
  0.7271749377250671,
  0.15585504472255707,
  0.10400675237178802,
  0.27948129177093506,
  0.1779269576072693],
 'fear': [0.928167998790741,
  0.9425276517868042,
  0.9723208546638489,

In [22]:
from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}


for i in tqdm(range(len(books))):
    # Append the ISBN-13 of the current book to the isbn list
    isbn.append(books["isbn13"][i])
    
    # Split the description of the current book into sentences
    sentences = books["description"][i].split(".")
    
    # Use the classifier to predict the emotions for each sentence
    predictions = classifier(sentences)
    
    # Calculate the maximum emotion scores for each emotion label from the predictions
    max_scores = calculate_max_emotion_scores(predictions)
    
    # Append the maximum scores for each emotion label to the corresponding list in emotion_scores
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [10:42<00:00,  8.09it/s]


In [23]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [24]:
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273591,0.928168,0.932797,0.646217,0.967158,0.729603,9780002005883
1,0.612619,0.348284,0.942528,0.704421,0.88794,0.11169,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.11169,0.078766,9780006178736
3,0.351483,0.150722,0.360707,0.251881,0.732686,0.11169,0.078766,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078766,9780006280934


In [25]:
books = pd.merge(books, emotions_df, on ="isbn13")

In [26]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categories,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.064134,0.273591,0.928168,0.932797,0.646217,0.967158,0.729603
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.612619,0.348284,0.942528,0.704421,0.887940,0.111690,0.252545
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078766
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.351483,0.150722,0.360707,0.251881,0.732686,0.111690,0.078766
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.081412,0.184495,0.095043,0.040564,0.884390,0.475881,0.078766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.148209,0.030643,0.919165,0.255169,0.853722,0.980877,0.030656
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.064134,0.114383,0.051363,0.400263,0.883199,0.111690,0.227765
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.064134,0.104007,0.459269,0.759455,0.951104,0.368111,0.078766


In [27]:
books.to_csv("books_with_emotions.csv", index=False)