In [2]:
import pandas as pd

books = pd.read_csv('books_with_categories.csv')

In [6]:
from transformers import pipeline
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base", 
                      return_all_scores=True,
                     top_k = None,
                     device='cpu')
classifier("I love this!")

Device set to use cpu


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528691716492176},
  {'label': 'neutral', 'score': 0.005764589179307222},
  {'label': 'anger', 'score': 0.004419791977852583},
  {'label': 'sadness', 'score': 0.002092393347993493},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.0004138525982853025}]]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [9]:
books.description

0       A NOVEL THAT READERS and critics have been eag...
1       A new 'Christie for Christmas' -- a full-lengt...
2       A memorable, mesmerizing heroine Jennifer -- b...
3       Lewis' work on the nature of love divides love...
4       "In The Problem of Pain, C.S. Lewis, one of th...
                              ...                        
5192    On A Train Journey Home To North India After L...
5193    This book tells the tale of a man who goes on ...
5194    Wisdom to Create a Life of Passion, Purpose, a...
5195    This collection of the timeless teachings of o...
5196    Since the three volume edition ofHegel's Philo...
Name: description, Length: 5197, dtype: object

In [17]:
books.description[0].split('.')

['A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives',
 ' John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers',
 ' It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up',
 ' Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist',
 ' He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption',
 ' Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and t

In [18]:
sentence = books['description'][0].split('.')
prediction = classifier(sentence)

In [19]:
prediction[0]

[{'label': 'surprise', 'score': 0.7296021580696106},
 {'label': 'neutral', 'score': 0.14038598537445068},
 {'label': 'fear', 'score': 0.06816227734088898},
 {'label': 'joy', 'score': 0.04794260486960411},
 {'label': 'anger', 'score': 0.009156367741525173},
 {'label': 'disgust', 'score': 0.0026284768246114254},
 {'label': 'sadness', 'score': 0.002122163772583008}]

In [20]:
sentence[3]

' Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist'

In [21]:
prediction[3]

[{'label': 'fear', 'score': 0.9281682968139648},
 {'label': 'anger', 'score': 0.03219081461429596},
 {'label': 'neutral', 'score': 0.012808660045266151},
 {'label': 'sadness', 'score': 0.008756875991821289},
 {'label': 'surprise', 'score': 0.008597892709076405},
 {'label': 'disgust', 'score': 0.008431807160377502},
 {'label': 'joy', 'score': 0.0010455832816660404}]

In [25]:
sorted(prediction[0], key=lambda x:x['label'])


[{'label': 'anger', 'score': 0.009156367741525173},
 {'label': 'disgust', 'score': 0.0026284768246114254},
 {'label': 'fear', 'score': 0.06816227734088898},
 {'label': 'joy', 'score': 0.04794260486960411},
 {'label': 'neutral', 'score': 0.14038598537445068},
 {'label': 'sadness', 'score': 0.002122163772583008},
 {'label': 'surprise', 'score': 0.7296021580696106}]

In [35]:
import numpy as np

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness','surprise']
isbn = []
emotion_scores = {label:[] for label in emotion_labels}

def calculate_max_emotions_score(predictions):
    per_emotion_scores = {label:[] for label in emotion_labels}
    for prediction in predictions:
        sorted_prediction = sorted(prediction, key=lambda x:x['label'])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_prediction[index]['score'])
    return {label:np.max(scores) for label, scores in per_emotion_scores.items()}

In [38]:
for i in range(10):
    isbn.append(books['isbn13'][i])
    sentence = books['description'][i].split('.')
    predictions = classifier(sentence)
    max_score = calculate_max_emotions_score(predictions)
    for label in emotion_labels:
            emotion_scores[label].append(max_score[label])
emotion_scores

{'anger': [np.float64(0.06413363665342331),
  np.float64(0.6126194596290588),
  np.float64(0.06413363665342331),
  np.float64(0.35148516297340393),
  np.float64(0.0814124271273613),
  np.float64(0.23222433030605316),
  np.float64(0.5381842255592346),
  np.float64(0.06413363665342331),
  np.float64(0.3006698489189148),
  np.float64(0.06413363665342331),
  np.float64(0.06413363665342331),
  np.float64(0.6126194596290588),
  np.float64(0.06413363665342331),
  np.float64(0.35148516297340393),
  np.float64(0.0814124271273613),
  np.float64(0.23222433030605316),
  np.float64(0.5381842255592346),
  np.float64(0.06413363665342331),
  np.float64(0.3006698489189148),
  np.float64(0.06413363665342331)],
 'disgust': [np.float64(0.27359113097190857),
  np.float64(0.3482842445373535),
  np.float64(0.10400673747062683),
  np.float64(0.1507222205400467),
  np.float64(0.18449550867080688),
  np.float64(0.7271752953529358),
  np.float64(0.15585486590862274),
  np.float64(0.10400673747062683),
  np.float

In [39]:
# from tqdm import tqdm



# for i in tqdm(range(len(books))):
#     isbn.append(books['isbn13'][i])
#     sentence = books['description'][i].split('.')
#     predictions = classifier(sentence)
#     max_score = calculate_max_emotions_score(predictions)
#     for label in emotion_labels:
#             emotion_scores[label].append(max_score[label])

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5197/5197 [16:23<00:00,  5.28it/s]


In [42]:
# emotion_df = pd.DataFrame(emotion_scores)
# emotion_df['isbn13'] = isbn

7
5218


In [43]:
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
emotion_rows = []

for i in tqdm(range(len(books))):
    try:
        isbn_val = books['isbn13'].iloc[i]
        description = str(books['description'].iloc[i])  # avoid NaN issues
        sentence = description.split('.')

        predictions = classifier(sentence)
        max_score = calculate_max_emotions_score(predictions)

        # build one row per book
        row = {label: max_score.get(label, 0.0) for label in emotion_labels}
        row['isbn13'] = isbn_val
        emotion_rows.append(row)

    except Exception as e:
        print(f"Skipping row {i} due to error: {e}")

# Build DataFrame
emotion_df = pd.DataFrame(emotion_rows)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5197/5197 [19:09<00:00,  4.52it/s]


In [44]:
print(len(emotion_scores))
print(len(isbn))

7
5218


In [45]:
emotion_df

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise,isbn13
0,0.064134,0.273591,0.928168,0.932798,0.646216,0.967158,0.729602,9780002005883
1,0.612619,0.348284,0.942528,0.704422,0.887940,0.111690,0.252546,9780002261982
2,0.064134,0.104007,0.972321,0.767239,0.549477,0.111690,0.078765,9780006178736
3,0.351485,0.150722,0.360705,0.251882,0.732685,0.111690,0.078765,9780006280897
4,0.081412,0.184496,0.095043,0.040564,0.884390,0.475880,0.078765,9780006280934
...,...,...,...,...,...,...,...,...
5192,0.148209,0.030643,0.919165,0.255171,0.853721,0.980877,0.030656,9788172235222
5193,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765,9788173031014
5194,0.009997,0.009929,0.339218,0.947779,0.375754,0.066685,0.057625,9788179921623
5195,0.064134,0.104007,0.459270,0.759457,0.951104,0.368111,0.078765,9788185300535


In [47]:
books = pd.merge(books,emotion_df,on='isbn13')
books.head(2)

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,neutral_x,sadness_x,surprise_x,anger_y,disgust_y,fear_y,joy_y,neutral_y,sadness_y,surprise_y
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,0.646216,0.967158,0.729602,0.064134,0.273591,0.928168,0.932798,0.646216,0.967158,0.729602
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,0.88794,0.11169,0.252546,0.612619,0.348284,0.942528,0.704422,0.88794,0.11169,0.252546


In [51]:
books = books.drop(columns=['anger_y','disgust_y','fear_y','joy_y',
                            'neutral_y','sadness_y','surprise_y'])
books = books.rename(columns={
    'anger_x':'anger','disgust_x':'disgust','fear_x':'fear',
    'joy_x':'joy','neutral_x':'neutral','sadness_x':'sadness','surprise_x':'surprise'
})


In [52]:
books.to_csv('books_with_emotions.csv', index=False)

In [53]:
books.columns

Index(['isbn13', 'isbn10', 'title', 'authors', 'categories', 'thumbnail',
       'description', 'published_year', 'average_rating', 'num_pages',
       'ratings_count', 'title_subtitle', 'tagged_description',
       'simple_categories', 'anger', 'disgust', 'fear', 'joy', 'neutral',
       'sadness', 'surprise'],
      dtype='object')

In [56]:
import os
print("OPENAI_API_KEY:", os.getenv("OPENAI_API_KEY"))


OPENAI_API_KEY: os.getenv("OPENAI_API_KEY")
