In [12]:
import pandas as pd
from tqdm import tqdm
from transformers import pipeline

In [2]:
books = pd.read_csv('books_with_categories.csv')
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_subtitle,tagged_desc,new_categories
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction


In [6]:
classifier = pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base', return_all_scores=True)
classifier.save_pretrained('./emotion-english-distilroberta-base')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [7]:
classifier('I am so happy to be here')

[[{'label': 'anger', 'score': 0.0008928002207539976},
  {'label': 'disgust', 'score': 0.00029430477297864854},
  {'label': 'fear', 'score': 0.0003287624567747116},
  {'label': 'joy', 'score': 0.9850720763206482},
  {'label': 'neutral', 'score': 0.0014550563646480441},
  {'label': 'sadness', 'score': 0.0032732996623963118},
  {'label': 'surprise', 'score': 0.00868357252329588}]]

In [8]:
emotions = ['anger','disgust','fear','joy','sadness','surprise','neutral']
isbn = []
emotion_scores = {label: [] for label in emotions}

def max_emotion(predictions):
    per_emotion_scores = {label: [] for label in emotions}
    for prediction in predictions:
        sorted_prediction = sorted(prediction,key=lambda x: x['label'])
        for idx, label in enumerate(emotions):
            per_emotion_scores[label].append(sorted_prediction[idx]['score'])
    return {label: max(scores) for label, scores in per_emotion_scores.items()}

In [None]:
for i in range(10):
    isbn.append(books['isbn13'][i])
    sentence = books['description'][i].split('.')
    predictions = classifier(sentence)
    max_scores = max_emotion(predictions)
    for label in emotions:
        emotion_scores[label].append(max_scores[label])

In [11]:
emotion_scores

{'anger': [0.06413356214761734,
  0.6126192212104797,
  0.06413356214761734,
  0.3514849841594696,
  0.08141239732503891,
  0.23222418129444122,
  0.5381843447685242,
  0.06413356214761734,
  0.30066990852355957,
  0.06413356214761734],
 'disgust': [0.27359116077423096,
  0.34828484058380127,
  0.10400664061307907,
  0.1507224291563034,
  0.18449531495571136,
  0.7271755337715149,
  0.15585477650165558,
  0.10400664061307907,
  0.27948105335235596,
  0.17792674899101257],
 'fear': [0.9281681180000305,
  0.9425276517868042,
  0.9723207950592041,
  0.3607051968574524,
  0.0950433686375618,
  0.05136275663971901,
  0.7474279403686523,
  0.4044974148273468,
  0.9155240654945374,
  0.05136275663971901],
 'joy': [0.9327986836433411,
  0.7044224739074707,
  0.7672379016876221,
  0.2518813908100128,
  0.04056437313556671,
  0.04337577894330025,
  0.8725656270980835,
  0.04056437313556671,
  0.04056437313556671,
  0.04056437313556671],
 'sadness': [0.646215558052063,
  0.8879395127296448,
  0.5

In [13]:
emotions = ['anger','disgust','fear','joy','sadness','surprise','neutral']
isbn = []
emotion_scores = {label: [] for label in emotions}


for i in tqdm(range(len(books))):
    isbn.append(books['isbn13'][i])
    sentence = books['description'][i].split('.')
    predictions = classifier(sentence)
    max_scores = max_emotion(predictions)
    for label in emotions:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [14:05<00:00,  6.15it/s]


In [14]:
emotion_df = pd.DataFrame(emotion_scores)
emotion_df['isbn13'] = isbn
emotion_df.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273591,0.928168,0.932799,0.646216,0.967158,0.729602,9780002005883
1,0.612619,0.348285,0.942528,0.704422,0.88794,0.11169,0.252546,9780002261982
2,0.064134,0.104007,0.972321,0.767238,0.549477,0.11169,0.078765,9780006178736
3,0.351485,0.150722,0.360705,0.251881,0.732685,0.11169,0.078765,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.88439,0.47588,0.078765,9780006280934


In [15]:
books_with_emotions = pd.merge(books, emotion_df, on='isbn13')
books_with_emotions

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_subtitle,tagged_desc,new_categories,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.064134,0.273591,0.928168,0.932799,0.646216,0.967158,0.729602
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.612619,0.348285,0.942528,0.704422,0.887940,0.111690,0.252546
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.064134,0.104007,0.972321,0.767238,0.549477,0.111690,0.078765
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.351485,0.150722,0.360705,0.251881,0.732685,0.111690,0.078765
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.081412,0.184495,0.095043,0.040564,0.884390,0.475880,0.078765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.148209,0.030643,0.919165,0.255171,0.853721,0.980877,0.030656
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.064134,0.104007,0.459268,0.759456,0.951104,0.368111,0.078765


In [16]:
books_with_emotions.to_csv('books_with_emotions.csv', index=False)