In [12]:
import pandas as pd 

books = pd.read_csv('books_with_categories.csv')

In [13]:
from transformers import pipeline

classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None, device=0)

classifier("I love it")


Device set to use cpu


[[{'label': 'joy', 'score': 0.9803199768066406},
  {'label': 'surprise', 'score': 0.006173064466565847},
  {'label': 'sadness', 'score': 0.004845899064093828},
  {'label': 'neutral', 'score': 0.004664228297770023},
  {'label': 'anger', 'score': 0.0025215698406100273},
  {'label': 'disgust', 'score': 0.0010052360594272614},
  {'label': 'fear', 'score': 0.0004700532299466431}]]

In [14]:
sentences = books['description'][0].split('.')
predictions = classifier(sentences)
predictions

[[{'label': 'surprise', 'score': 0.7296027541160583},
  {'label': 'neutral', 'score': 0.14038576185703278},
  {'label': 'fear', 'score': 0.06816209107637405},
  {'label': 'joy', 'score': 0.04794240742921829},
  {'label': 'anger', 'score': 0.00915635284036398},
  {'label': 'disgust', 'score': 0.0026284719351679087},
  {'label': 'sadness', 'score': 0.0021221607457846403}],
 [{'label': 'neutral', 'score': 0.449370414018631},
  {'label': 'disgust', 'score': 0.27359241247177124},
  {'label': 'joy', 'score': 0.10908260941505432},
  {'label': 'sadness', 'score': 0.09362703561782837},
  {'label': 'anger', 'score': 0.040478307753801346},
  {'label': 'surprise', 'score': 0.02697017230093479},
  {'label': 'fear', 'score': 0.006879065651446581}],
 [{'label': 'neutral', 'score': 0.6462168097496033},
  {'label': 'sadness', 'score': 0.2427326887845993},
  {'label': 'disgust', 'score': 0.04342268034815788},
  {'label': 'surprise', 'score': 0.028300466015934944},
  {'label': 'joy', 'score': 0.014211482

In [15]:
sorted(predictions[0], key=lambda x: x['label'])

[{'label': 'anger', 'score': 0.00915635284036398},
 {'label': 'disgust', 'score': 0.0026284719351679087},
 {'label': 'fear', 'score': 0.06816209107637405},
 {'label': 'joy', 'score': 0.04794240742921829},
 {'label': 'neutral', 'score': 0.14038576185703278},
 {'label': 'sadness', 'score': 0.0021221607457846403},
 {'label': 'surprise', 'score': 0.7296027541160583}]

In [16]:
import numpy as np

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']

isbn = []

emotion_scores = {
    label: [] for label in emotion_labels
}

def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_prediction = sorted(prediction, key=lambda x: x['label'])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_prediction[index]['score'])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [17]:
from tqdm import tqdm

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']

isbn = []

emotion_scores = {
    label: [] for label in emotion_labels
}

for i in tqdm(range(len(books))):
    isbn.append(books['isbn13'][i])
    sentences = books['description'][i].split('.')
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])


100%|██████████| 5197/5197 [10:54<00:00,  7.94it/s]


In [18]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df['isbn13'] = isbn

In [19]:
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273592,0.928169,0.932798,0.646217,0.967158,0.729603,9780002005883
1,0.612619,0.348284,0.942528,0.704422,0.887939,0.11169,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.11169,0.078765,9780006178736
3,0.351483,0.150723,0.360707,0.251881,0.732685,0.11169,0.078765,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078765,9780006280934


In [20]:
books = pd.merge(books, emotions_df, on='isbn13')

In [21]:
books.to_csv('books_with_emotions.csv', index=False)