In [1]:
import pandas as pd

In [2]:
books = pd.read_csv('books_with_categories.csv')

In [3]:
from transformers import pipeline

In [4]:
classifier = pipeline('text-classification',
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k=None,
                      return_all_scores=True)

Device set to use cpu


In [11]:
classifier(books['description'][3])

[[{'label': 'anger', 'score': 0.33278363943099976},
  {'label': 'fear', 'score': 0.23052769899368286},
  {'label': 'neutral', 'score': 0.20132723450660706},
  {'label': 'disgust', 'score': 0.19818544387817383},
  {'label': 'sadness', 'score': 0.0277869813144207},
  {'label': 'joy', 'score': 0.005105332005769014},
  {'label': 'surprise', 'score': 0.0042836712673306465}]]

In [5]:
from tqdm import tqdm

In [16]:
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
isbns = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbns.append(books['isbn13'].iloc[i])
    sentences = books['description'].iloc[i].split('. ')
    predictions = classifier(sentences)

    scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        for per_emotion in prediction:
            scores[per_emotion['label']].append(per_emotion['score'])
    
    for label, score in scores.items():
        emotion_scores[label].append(sum(score) / len(score))

100%|██████████| 5197/5197 [14:14<00:00,  6.08it/s]


In [17]:
emotion_scores

{'anger': [0.008800245921260543,
  0.09712236636551097,
  0.02595987217500806,
  0.07054379070177674,
  0.03593246762951215,
  0.10207543118546407,
  0.13825623277807608,
  0.014050089055672288,
  0.12595021966844797,
  0.01004047505557537,
  0.0061598895117640495,
  0.09588780719786882,
  0.03556511859642342,
  0.019682148537997688,
  0.0353927219286561,
  0.05474554607644677,
  0.09518738090991974,
  0.004092141054570675,
  0.02299683657474816,
  0.055436585750430825,
  0.050973362755030394,
  0.010987761663272977,
  0.024736454710364342,
  0.012484994367696345,
  0.11929202899336815,
  0.02781981769949198,
  0.011287260334938765,
  0.030173048842698336,
  0.04290888458490372,
  0.04634507091250271,
  0.014969006665827086,
  0.01912170695140958,
  0.0102313356862093,
  0.039192162454128265,
  0.004580603912472725,
  0.032551873568445445,
  0.014422728563658893,
  0.047291601076722145,
  0.17341522872447968,
  0.029814803041517735,
  0.17879592394456267,
  0.2773083965294063,
  0.0137

In [18]:
emotion_df = pd.DataFrame(emotion_scores)
emotion_df['isbn13'] = isbns

In [19]:
books = pd.merge(books, emotion_df, on='isbn13')

In [20]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_category,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.008800,0.061209,0.154278,0.243816,0.236471,0.176979,0.118447
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.097122,0.082514,0.206212,0.121672,0.386340,0.024005,0.082134
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.025960,0.013750,0.537443,0.385195,0.023602,0.006225,0.007826
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.070544,0.099393,0.137070,0.135717,0.518879,0.025420,0.012978
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.035932,0.089143,0.027557,0.022616,0.642025,0.167593,0.015133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.060543,0.017399,0.286443,0.044598,0.191592,0.388530,0.010895
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.017728,0.060251,0.015644,0.159520,0.649211,0.008065,0.089580
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.009407,0.008061,0.110144,0.588812,0.236817,0.024604,0.022155
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.010095,0.028933,0.054784,0.151833,0.689036,0.045773,0.019545


In [21]:
books.to_csv('books_with_emotions.csv', index=False)