In [1]:
import pandas as pd

movies = pd.read_csv("movies_with_categories.csv")

In [3]:
from transformers import pipeline
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k = None,
                      device = -1)
classifier("I love this!")

Device set to use cpu


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528691716492176},
  {'label': 'neutral', 'score': 0.005764589179307222},
  {'label': 'anger', 'score': 0.004419791977852583},
  {'label': 'sadness', 'score': 0.002092393347993493},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.0004138525982853025}]]

In [4]:
classifier(movies["description"][0])


[[{'label': 'joy', 'score': 0.7927948832511902},
  {'label': 'neutral', 'score': 0.15551497042179108},
  {'label': 'disgust', 'score': 0.01860632374882698},
  {'label': 'anger', 'score': 0.012162130326032639},
  {'label': 'sadness', 'score': 0.010016023181378841},
  {'label': 'surprise', 'score': 0.009432087652385235},
  {'label': 'fear', 'score': 0.0014735726872459054}]]

In [25]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
ids = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [26]:
movies = movies.dropna(subset=["description"])
movies = movies[movies["description"].str.strip() != ""]
movies = movies.reset_index(drop=True)
movies

Unnamed: 0,id,name,date,tagline,description,minute,rating,theme,genre,tagged_description,simple_genres
0,1000001,Barbie,2023.0,She's everything. He's just Ken.,Barbie and Ken are having the time of their li...,114.0,3.86,"Humanity and the world around us, Crude humor ...","Comedy, Adventure",1000001 Barbie and Ken are having the time of ...,"['Escapist', 'Feel-good']"
1,1000002,Parasite,2019.0,Act like you own the place.,"All unemployed, Ki-taek's family takes peculia...",133.0,4.56,"Humanity and the world around us, Intense viol...","Comedy, Thriller, Drama","1000002 All unemployed, Ki-taek's family takes...","['Emotional', 'Feel-good', 'Suspenseful']"
2,1000003,Everything Everywhere All at Once,2022.0,The universe is so much bigger than you realize.,An aging Chinese immigrant is swept up in an i...,140.0,4.30,"Humanity and the world around us, Moving relat...","Science Fiction, Adventure, Comedy, Action",1000003 An aging Chinese immigrant is swept up...,"['Escapist', 'Feel-good']"
3,1000004,Fight Club,1999.0,Mischief. Mayhem. Soap.,A ticking-time-bomb insomniac and a slippery s...,139.0,4.27,"Intense violence and sexual transgression, Hum...",Drama,1000004 A ticking-time-bomb insomniac and a sl...,['Emotional']
4,1000005,La La Land,2016.0,Here's to the fools who dream.,"Mia, an aspiring actress, serves lattes to mov...",129.0,4.09,"Song and dance, Humanity and the world around ...","Drama, Comedy, Music, Romance","1000005 Mia, an aspiring actress, serves latte...","['Emotional', 'Artistic', 'Feel-good']"
...,...,...,...,...,...,...,...,...,...,...,...
24407,1664306,Fixerr,2019.0,,ATS Officer Jayveer Malik’s stands suspended a...,324.0,,"Thrillers and murder mysteries, High speed and...",Drama,1664306 ATS Officer Jayveer Malik’s stands sus...,['Emotional']
24408,1693420,"LSD - Love, Scandal and Doctors",2021.0,,"Love, Scandal and Doctors is a story of 5 medi...",300.0,,"Thrillers and murder mysteries, High speed and...","Drama, Mystery","1693420 Love, Scandal and Doctors is a story o...","['Emotional', 'Suspenseful']"
24409,1702871,Olessia,2019.0,,Film with a soundtrack featuring G. Venugopal ...,,,"Song and dance, Dazzling vocal performances an...",Drama,1702871 Film with a soundtrack featuring G. Ve...,['Emotional']
24410,1762425,Pretty Things,2005.0,,A look into the world of 20th century burlesqu...,90.0,,"Song and dance, Erotic relationships and desire",Documentary,1762425 A look into the world of 20th century ...,['Educational']


In [27]:
from tqdm import tqdm

for i in tqdm(range(len(movies))):
    ids.append(movies["id"][i])
    sentences = movies["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 24412/24412 [36:49<00:00, 11.05it/s]


In [23]:
movies["description"][23156]

KeyError: 23156

In [33]:
emotions_df = pd.DataFrame(emotion_scores)
# print(emotions_df)
# print(len(ids))
emotions_df["id"] = ids
emotions_df

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,id
0,0.064134,0.104007,0.051363,0.737157,0.549477,0.111690,0.078765,1000001
1,0.064134,0.104007,0.051363,0.040564,0.549477,0.304677,0.088714,1000002
2,0.120102,0.104007,0.212463,0.040564,0.549477,0.111690,0.152938,1000003
3,0.107869,0.104007,0.904958,0.040564,0.800969,0.111690,0.078765,1000004
4,0.064134,0.104007,0.967849,0.040564,0.549477,0.111690,0.078765,1000005
...,...,...,...,...,...,...,...,...
24407,0.682494,0.144579,0.026709,0.080378,0.892373,0.894916,0.398169,1664306
24408,0.453025,0.497322,0.026341,0.014087,0.454776,0.191987,0.071671,1693420
24409,0.007377,0.008226,0.005747,0.452469,0.908463,0.048569,0.207229,1702871
24410,0.008809,0.016056,0.012568,0.643105,0.863761,0.020441,0.072019,1762425


In [38]:
# movies = pd.merge(movies, emotions_df, on = "id")
# movies = movies.drop(["surprise_y", "sadness_y", "neutral_y"], axis=1)
movies



Unnamed: 0,id,name,date,tagline,description,minute,rating,theme,genre,tagged_description,simple_genres,anger,disgust,fear,joy,sadness,surprise,neutral
0,1000001,Barbie,2023.0,She's everything. He's just Ken.,Barbie and Ken are having the time of their li...,114.0,3.86,"Humanity and the world around us, Crude humor ...","Comedy, Adventure",1000001 Barbie and Ken are having the time of ...,"['Escapist', 'Feel-good']",0.064134,0.104007,0.051363,0.737157,0.549477,0.111690,0.078765
1,1000002,Parasite,2019.0,Act like you own the place.,"All unemployed, Ki-taek's family takes peculia...",133.0,4.56,"Humanity and the world around us, Intense viol...","Comedy, Thriller, Drama","1000002 All unemployed, Ki-taek's family takes...","['Emotional', 'Feel-good', 'Suspenseful']",0.064134,0.104007,0.051363,0.040564,0.549477,0.304677,0.088714
2,1000003,Everything Everywhere All at Once,2022.0,The universe is so much bigger than you realize.,An aging Chinese immigrant is swept up in an i...,140.0,4.30,"Humanity and the world around us, Moving relat...","Science Fiction, Adventure, Comedy, Action",1000003 An aging Chinese immigrant is swept up...,"['Escapist', 'Feel-good']",0.120102,0.104007,0.212463,0.040564,0.549477,0.111690,0.152938
3,1000004,Fight Club,1999.0,Mischief. Mayhem. Soap.,A ticking-time-bomb insomniac and a slippery s...,139.0,4.27,"Intense violence and sexual transgression, Hum...",Drama,1000004 A ticking-time-bomb insomniac and a sl...,['Emotional'],0.107869,0.104007,0.904958,0.040564,0.800969,0.111690,0.078765
4,1000005,La La Land,2016.0,Here's to the fools who dream.,"Mia, an aspiring actress, serves lattes to mov...",129.0,4.09,"Song and dance, Humanity and the world around ...","Drama, Comedy, Music, Romance","1000005 Mia, an aspiring actress, serves latte...","['Emotional', 'Artistic', 'Feel-good']",0.064134,0.104007,0.967849,0.040564,0.549477,0.111690,0.078765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24407,1664306,Fixerr,2019.0,,ATS Officer Jayveer Malik’s stands suspended a...,324.0,,"Thrillers and murder mysteries, High speed and...",Drama,1664306 ATS Officer Jayveer Malik’s stands sus...,['Emotional'],0.682494,0.144579,0.026709,0.080378,0.892373,0.894916,0.398169
24408,1693420,"LSD - Love, Scandal and Doctors",2021.0,,"Love, Scandal and Doctors is a story of 5 medi...",300.0,,"Thrillers and murder mysteries, High speed and...","Drama, Mystery","1693420 Love, Scandal and Doctors is a story o...","['Emotional', 'Suspenseful']",0.453025,0.497322,0.026341,0.014087,0.454776,0.191987,0.071671
24409,1702871,Olessia,2019.0,,Film with a soundtrack featuring G. Venugopal ...,,,"Song and dance, Dazzling vocal performances an...",Drama,1702871 Film with a soundtrack featuring G. Ve...,['Emotional'],0.007377,0.008226,0.005747,0.452469,0.908463,0.048569,0.207229
24410,1762425,Pretty Things,2005.0,,A look into the world of 20th century burlesqu...,90.0,,"Song and dance, Erotic relationships and desire",Documentary,1762425 A look into the world of 20th century ...,['Educational'],0.008809,0.016056,0.012568,0.643105,0.863761,0.020441,0.072019


In [39]:
movies.to_csv("movies_with_emotions.csv", index = False)