In [8]:
import pandas as pd
books = pd.read_csv("books_with_categories.csv")

In [9]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
classifier("I love this!")

Device set to use cuda:0


[[{'label': 'anger', 'score': 0.004419781267642975},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.00041385178337804973},
  {'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'neutral', 'score': 0.005764597095549107},
  {'label': 'sadness', 'score': 0.002092392183840275},
  {'label': 'surprise', 'score': 0.008528676815330982}]]

In [10]:
classifier(books["description"][0].split("."))


[[{'label': 'anger', 'score': 0.009156355634331703},
  {'label': 'disgust', 'score': 0.0026284726336598396},
  {'label': 'fear', 'score': 0.06816210597753525},
  {'label': 'joy', 'score': 0.04794240742921829},
  {'label': 'neutral', 'score': 0.14038553833961487},
  {'label': 'sadness', 'score': 0.0021221598144620657},
  {'label': 'surprise', 'score': 0.7296029925346375}],
 [{'label': 'anger', 'score': 0.040478356182575226},
  {'label': 'disgust', 'score': 0.27359211444854736},
  {'label': 'fear', 'score': 0.006879056803882122},
  {'label': 'joy', 'score': 0.10908288508653641},
  {'label': 'neutral', 'score': 0.44937026500701904},
  {'label': 'sadness', 'score': 0.09362723678350449},
  {'label': 'surprise', 'score': 0.0269701536744833}],
 [{'label': 'anger', 'score': 0.011031899601221085},
  {'label': 'disgust', 'score': 0.04342261701822281},
  {'label': 'fear', 'score': 0.0140841044485569},
  {'label': 'joy', 'score': 0.014211499132215977},
  {'label': 'neutral', 'score': 0.64621704816

In [11]:
import numpy as np
emotion_labels = ["anger","disgust","fear","joy","sadness","surprise","neutral"]
isbn = []
emotion_scores = {label : [] for label in emotion_labels}

def calculate_max_emotion_score(predictions):
    per_emotion_scores = {label : [] for label in emotion_labels}
    for prediction in predictions:
        sorted_prediction = sorted(prediction,key = lambda x:x["label"])
        for index,label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_prediction[index]["score"])
    return {label: np.max(scores) for label,scores in per_emotion_scores.items()}

In [12]:
from tqdm import tqdm

In [13]:
for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_score(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [02:09<00:00, 40.17it/s]


In [14]:
emotion_scores

{'anger': [0.06413373351097107,
  0.6126188039779663,
  0.06413373351097107,
  0.3514838218688965,
  0.0814124271273613,
  0.23222483694553375,
  0.538183867931366,
  0.06413373351097107,
  0.3006703555583954,
  0.06413373351097107,
  0.06413373351097107,
  0.6115744709968567,
  0.10621519386768341,
  0.06413373351097107,
  0.07239226251840591,
  0.09586341679096222,
  0.1621592789888382,
  0.06413373351097107,
  0.08193846791982651,
  0.22345995903015137,
  0.10715228319168091,
  0.06413373351097107,
  0.06413373351097107,
  0.06413373351097107,
  0.39636677503585815,
  0.06472660601139069,
  0.06413373351097107,
  0.06413373351097107,
  0.06413373351097107,
  0.3029334247112274,
  0.06683339923620224,
  0.06413373351097107,
  0.06413373351097107,
  0.16889670491218567,
  0.06413373351097107,
  0.06413373351097107,
  0.06413373351097107,
  0.07910309731960297,
  0.11856525391340256,
  0.06413373351097107,
  0.5636677742004395,
  0.9171823859214783,
  0.0683567225933075,
  0.2205990254

In [18]:
emotion_df = pd.DataFrame(emotion_scores)
emotion_df["isbn13"] = isbn

In [19]:
emotion_df

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273592,0.928168,0.932797,0.646217,0.967158,0.729603,9780002005883
1,0.612619,0.348285,0.942528,0.704421,0.887940,0.111690,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767236,0.549476,0.111690,0.078766,9780006178736
3,0.351484,0.150722,0.360707,0.251881,0.732685,0.111690,0.078766,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.884390,0.475880,0.078766,9780006280934
...,...,...,...,...,...,...,...,...
5192,0.148209,0.030643,0.919165,0.255169,0.853721,0.980877,0.030656,9788172235222
5193,0.064134,0.114383,0.051363,0.400263,0.883199,0.111690,0.227765,9788173031014
5194,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625,9788179921623
5195,0.064134,0.104007,0.459270,0.759455,0.951104,0.368111,0.078766,9788185300535


In [20]:
books = pd.merge(books,emotion_df, on = "isbn13")

In [21]:
books.head()

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categroies,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.064134,0.273592,0.928168,0.932797,0.646217,0.967158,0.729603
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.612619,0.348285,0.942528,0.704421,0.88794,0.11169,0.252545
2,9780006178736,6178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.064134,0.104007,0.972321,0.767236,0.549476,0.11169,0.078766
3,9780006280897,6280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.351484,0.150722,0.360707,0.251881,0.732685,0.11169,0.078766
4,9780006280934,6280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.081412,0.184495,0.095043,0.040564,0.88439,0.47588,0.078766


In [22]:
books.to_csv("books_with_emotions.csv")