In [None]:
# pip install transformers torch 
# pip install sentencepiece

In [2]:
import pandas as pd
from transformers import pipeline

from tqdm import tqdm

pd.set_option('display.max_colwidth', None)

In [3]:
tqdm.pandas(desc="Pensando")

In [None]:
df = pd.read_csv("fils/friends_quotes.csv")
df.head()

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,Monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I work with!,0.0,1.0
1,Joey,1.0,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's gotta be something wrong with him!",1.0,1.0
2,Chandler,1.0,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hump? A hump and a hairpiece?",2.0,1.0
3,Phoebe,1.0,Monica Gets A Roommate,"Wait, does he eat chalk?",3.0,1.0
4,Phoebe,1.0,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through what I went through with Carl- oh!",4.0,1.0


In [8]:
emotion_analyzer = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    return_all_scores=True
)

Device set to use cpu


In [9]:
def get_dominant_emotion(text):
    scores = emotion_analyzer(text)[0]
    return max(scores, key=lambda x: x["score"])["label"]

In [None]:
# df["emotion"] = df["quote"].apply(get_dominant_emotion)

# df[["author", "quote", "emotion"]].head()


Unnamed: 0,author,quote,emotion
0,Monica,There's nothing to tell! He's just some guy I work with!,neutral
1,Joey,"C'mon, you're going out with the guy! There's gotta be something wrong with him!",anger
2,Chandler,"All right Joey, be nice. So does he have a hump? A hump and a hairpiece?",neutral
3,Phoebe,"Wait, does he eat chalk?",neutral
4,Phoebe,"Just, 'cause, I don't want her to go through what I went through with Carl- oh!",anger


In [None]:
# df.to_csv('files/df_emotionfriends.csv', index=False)
# index=False para que no se guarde la columna de índices.

Si tenemos muchas filas, mejor usar esta opción (más rápida):

In [None]:
def batch_emotions(texts, batch_size=32):
    results = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        outputs = emotion_analyzer(batch)
        for scores in outputs:
            results.append(max(scores, key=lambda x: x["score"])["label"])
    return results

df["emotion"] = batch_emotions(df["quote"].tolist())

In [None]:
df_emo = pd.read_csv("fils/df_emotionfriends.csv")
df_emo.head()

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
0,Monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I work with!,0.0,1.0,neutral
1,Joey,1.0,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's gotta be something wrong with him!",1.0,1.0,anger
2,Chandler,1.0,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hump? A hump and a hairpiece?",2.0,1.0,neutral
3,Phoebe,1.0,Monica Gets A Roommate,"Wait, does he eat chalk?",3.0,1.0,neutral
4,Phoebe,1.0,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through what I went through with Carl- oh!",4.0,1.0,anger


In [15]:
df_emo['emotion'].value_counts()

emotion
neutral     19080
surprise    14974
anger        8487
joy          7874
disgust      5524
sadness      2342
fear         2010
Name: count, dtype: int64

In [9]:
main_characters = [
    "Monica", "Joey", "Chandler",
    "Phoebe", "Ross", "Rachel"]

df_clean = df_emo[df_emo["author"].isin(main_characters)].copy()

In [None]:
df_clean.to_csv('fils/df_emotionfriends_clean.csv', index=False)
# index=False para que no se guarde la columna de índices.

In [10]:
df_clean["author"].value_counts()

author
Rachel      8318
Ross        8088
Monica      7516
Chandler    7488
Joey        7373
Phoebe      6699
Name: count, dtype: int64

In [11]:
emotion_by_character = (
    df_clean
    .groupby("author")["emotion"]
    .value_counts(normalize=True)
    .unstack()
    .fillna(0))

emotion_by_character

emotion,anger,disgust,fear,joy,neutral,sadness,surprise
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chandler,0.155048,0.093884,0.039263,0.124866,0.312366,0.036993,0.23758
Joey,0.169131,0.098603,0.034721,0.149193,0.265021,0.033094,0.250237
Monica,0.164582,0.086615,0.02488,0.1252,0.284194,0.036855,0.277674
Phoebe,0.149873,0.106583,0.034483,0.138976,0.283774,0.036722,0.249589
Rachel,0.149195,0.086559,0.036187,0.130921,0.270738,0.042077,0.284323
Ross,0.147255,0.086672,0.040183,0.131677,0.301187,0.03771,0.255317


In [13]:
emotion_by_season = (
    df_clean.groupby("season")["emotion"]
      .value_counts(normalize=True)
      .unstack()
      .fillna(0))

emotion_by_season


emotion,anger,disgust,fear,joy,neutral,sadness,surprise
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,0.109,0.094801,0.021188,0.08235,0.395369,0.038008,0.259284
2.0,0.110795,0.122159,0.019886,0.068182,0.414773,0.034091,0.230114
3.0,0.143403,0.109414,0.042582,0.117434,0.316975,0.040099,0.230094
4.0,0.175523,0.095397,0.040377,0.146862,0.261506,0.0341,0.246234
5.0,0.168389,0.096035,0.023868,0.151475,0.262169,0.038527,0.259538
6.0,0.185929,0.097848,0.038886,0.154458,0.238018,0.03147,0.253391
7.0,0.187695,0.078557,0.044497,0.139352,0.2397,0.032778,0.277422
8.0,0.165105,0.096214,0.053084,0.136612,0.250781,0.032787,0.265418
9.0,0.131018,0.08202,0.025991,0.130166,0.301236,0.04836,0.28121
10.0,0.123241,0.079664,0.021562,0.138675,0.325238,0.042442,0.269178


In [14]:
df_clean[df_clean["emotion"] == "neutral"]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
0,Monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I work with!,0.0,1.0,neutral
2,Chandler,1.0,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hump? A hump and a hairpiece?",2.0,1.0,neutral
3,Phoebe,1.0,Monica Gets A Roommate,"Wait, does he eat chalk?",3.0,1.0,neutral
5,Monica,1.0,Monica Gets A Roommate,"Okay, everybody relax. This is not even a date. It's just two people going out to dinner and- not having sex.",5.0,1.0,neutral
6,Chandler,1.0,Monica Gets A Roommate,Sounds like a date to me.,6.0,1.0,neutral
...,...,...,...,...,...,...,...
60284,Joey,17.0,"The Last One, Part I & II",Yeah. I guess so.,579.0,10.0,neutral
60286,Chandler,17.0,"The Last One, Part I & II","Oh, it's gonna be okay.",581.0,10.0,neutral
60288,Monica,17.0,"The Last One, Part I & II",We got some time.,583.0,10.0,neutral
60289,Rachel,17.0,"The Last One, Part I & II","Okay, should we get some coffee?",584.0,10.0,neutral


In [16]:
df_clean[df_clean["emotion"] == "fear"]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
12,Joey,1.0,Monica Gets A Roommate,Never had that dream.,12.0,1.0,fear
73,Chandler,1.0,Monica Gets A Roommate,"Please don't do that again, it's a horrible sound.",73.0,1.0,fear
86,Monica,1.0,Monica Gets A Roommate,(horrified) Really?,86.0,1.0,fear
141,Ross,1.0,Monica Gets A Roommate,"You know what the scariest part is? What if there's only one woman for everybody, y'know? I mean what if you get one woman- and that's it? Unfortunately in my case, there was only one woman- for her...",142.0,1.0,fear
256,Chandler,1.0,Monica Gets A Roommate,"(as Rachel is cutting up her cards) Y'know, if you listen closely, you can hear a thousand retailers scream.",258.0,1.0,fear
...,...,...,...,...,...,...,...
60048,Chandler,17.0,"The Last One, Part I & II","Oh, don't worry, we'll find them.",343.0,10.0,fear
60128,Joey,17.0,"The Last One, Part I & II","(yelling) Don't worry, you guys, we're gonna get you out of there.",423.0,10.0,fear
60192,Chandler,17.0,"The Last One, Part I & II","Aww, we were worried about you! Hm. I guess I better get used to things crapping in my hand, huh?",487.0,10.0,fear
60214,Phoebe,17.0,"The Last One, Part I & II",(screaming) RACHEL!!,509.0,10.0,fear


In [17]:
df_clean[df_clean["emotion"] == "joy"]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
29,Ross,1.0,Monica Gets A Roommate,"I'll be fine, alright? Really, everyone. I hope she'll be very happy.",29.0,1.0,joy
39,Joey,1.0,Monica Gets A Roommate,"Strip joint! C'mon, you're single! Have some hormones!",39.0,1.0,joy
46,Rachel,1.0,Monica Gets A Roommate,"Hi, sure!",46.0,1.0,joy
66,Phoebe,1.0,Monica Gets A Roommate,"(sings) Raindrops on roses and rabbits and kittens, (Rachel and Monica turn to look at her.) bluebells and sleighbells and- something with mittens... La la la la...something and noodles with string. These are a few...",66.0,1.0,joy
68,Phoebe,1.0,Monica Gets A Roommate,(grins and walks to the kitchen and says to Chandler and Joey.) I helped!,68.0,1.0,joy
...,...,...,...,...,...,...,...
60226,Ross,17.0,"The Last One, Part I & II","Hey, hey. I know you love me. I know you do.",521.0,10.0,joy
60242,Joey,17.0,"The Last One, Part I & II",I love you!,537.0,10.0,joy
60243,Joey,17.0,"The Last One, Part I & II",Hey!,538.0,10.0,joy
60263,Rachel,17.0,"The Last One, Part I & II",I do love you.,558.0,10.0,joy


In [19]:
df_clean[df_clean["emotion"] == "anger"]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
1,Joey,1.0,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's gotta be something wrong with him!",1.0,1.0,anger
4,Phoebe,1.0,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through what I went through with Carl- oh!",4.0,1.0,anger
18,Joey,1.0,Monica Gets A Roommate,"This guy says hello, I wanna kill myself.",18.0,1.0,anger
19,Monica,1.0,Monica Gets A Roommate,"Are you okay, sweetie?",19.0,1.0,anger
27,Ross,1.0,Monica Gets A Roommate,"No, no don't! Stop cleansing my aura! No, just leave my aura alone, okay?",27.0,1.0,anger
...,...,...,...,...,...,...,...
60257,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, please, miss, you don't understand!",552.0,10.0,anger
60258,Ross,17.0,"The Last One, Part I & II",Try to understand!,553.0,10.0,anger
60266,Ross,17.0,"The Last One, Part I & II","Me neither, okay? We are - we're done being stupid.",561.0,10.0,anger
60267,Rachel,17.0,"The Last One, Part I & II","Okay. You and me, alright? This is it.",562.0,10.0,anger


In [18]:
df_clean[df_clean["emotion"] == "disgust"]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season,emotion
16,Chandler,1.0,Monica Gets A Roommate,"Finally, I figure I'd better answer it, and it turns out it's my mother, which is very-very weird, because- she never calls me!",16.0,1.0,disgust
20,Ross,1.0,Monica Gets A Roommate,"I just feel like someone reached down my throat, grabbed my small intestine, pulled it out of my mouth and tied it around my neck...",20.0,1.0,disgust
28,Phoebe,1.0,Monica Gets A Roommate,Fine! Be murky!,28.0,1.0,disgust
34,Chandler,1.0,Monica Gets A Roommate,Sometimes I wish I was a lesbian... (They all stare at him.) Did I say that out loud?,34.0,1.0,disgust
51,Rachel,1.0,Monica Gets A Roommate,"Ooh, I was kinda hoping that wouldn't be an issue...Scene: Monica's Apartment, everyone is there and watching a Spanish Soap on TV and are trying to figure out what is going on.]",51.0,1.0,disgust
...,...,...,...,...,...,...,...
60249,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Ross, hi. It's me. I just got back on the plane. And I just feel awful. That is so not how I wanted things to end with us. It's just that I wasn't expecting to see you, and all of a sudden you're there and saying these things... And... And now I'm just sitting here and thinking of all the stuff I should have said, and I didn't. I mean, I didn't even get to tell you that I love you too. Because of course I do. I love you. I love you. I love you. What am I doing? I love you! Oh, I've gotta see you. I've gotta get off this plane.",544.0,10.0,disgust
60259,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, come on, miss, isn't there any way that you can just let me off...",554.0,10.0,disgust
60265,Rachel,17.0,"The Last One, Part I & II","Okay. 'Cause this is where I wanna be, okay? No more messing around. I don't wanna mess this up again.",560.0,10.0,disgust
60271,Monica,17.0,"The Last One, Part I & II","If that falls off the truck, it wouldn't be the worst thing.",566.0,10.0,disgust


## Códigos de ejemplo de clase

In [16]:
df["sentimiento"] = df["review"].progress_apply(analisis_sentimiento)

Pensando: 100%|██████████| 250/250 [00:33<00:00,  7.44it/s]


In [17]:
df.head()

Unnamed: 0,username,episode,review,sentimiento
0,ComedyGold,1x01 - The One Where Monica Gets a Roommate,Mixed feelings about this one. Certain moments was great but the pacing fell flat.,negative
1,ComedyGold,1x09 - The One Where Underdog Gets Away,Rewatching this episode and I appreciate it more. The humor holds up remarkably well.,positive
2,TribbianiFan,1x15 - The One with the Stoned Guy,Rewatching this episode and I appreciate it more. The humor holds up surprisingly well.,positive
3,BingWatch,1x01 - The One Where Monica Gets a Roommate,This one missed the mark with side story. I found myself dying laughing during most of it.,negative
4,GellerPride,1x05 - The One with the East German Laundry Detergent,Not terrible but uneven. The performances could have been more interesting.,negative


In [19]:
df[df["sentimiento"] == "neutral"]

Unnamed: 0,username,episode,review,sentimiento
15,WeWereOnABreak,1x03 - The One with the Thumb,"Not bad, not great. chemistry had potential but choppy. Still decent enough.",neutral
16,BuffayEnergy,1x10 - The One with the Monkey,This one is peak for what it is. {positive} but also {negative}.,neutral
44,BingeWatcher,1x06 - The One with the Butt,Pretty standard episode. subplot is peak but nothing earlier episodes.,neutral
64,BingWatch,1x07 - The One with the Blackout,"Not bad, not great. humor had potential but slow. Still fine.",neutral
70,RetroTV,1x10 - The One with the Monkey,"Not bad, not great. writing had potential but rushed. Still watchable.",neutral
72,HowYouDoin,1x01 - The One Where Monica Gets a Roommate,"Not bad, not great. timing had potential but uneven. Still decent enough.",neutral
77,PhoebeVibes,1x02 - The One with the Sonogram at the End,"Not bad, not great. chemistry had potential but rushed. Still fine.",neutral
90,RetroTV,1x16 - The One with Two Parts (Part 1),Pretty standard episode. subplot is strongest but nothing the classics.,neutral
105,RetroTV,1x17 - The One with Two Parts (Part 2),Pretty standard episode. subplot is peak but nothing the classics.,neutral
111,RossTheGenius,1x11 - The One with Mrs. Bing,Pretty standard episode. character arc is peak but nothing other season 1 episodes.,neutral


In [20]:
def score(texto):
    try:
        res = sentiment_analyzer(texto)
        return res[0]["score"]
    
    except:
        return texto

In [21]:
df["score"] = df["review"].progress_apply(score)

Pensando: 100%|██████████| 250/250 [00:30<00:00,  8.31it/s]


In [22]:
df.head()

Unnamed: 0,username,episode,review,sentimiento,score
0,ComedyGold,1x01 - The One Where Monica Gets a Roommate,Mixed feelings about this one. Certain moments was great but the pacing fell flat.,negative,0.84639
1,ComedyGold,1x09 - The One Where Underdog Gets Away,Rewatching this episode and I appreciate it more. The humor holds up remarkably well.,positive,0.977597
2,TribbianiFan,1x15 - The One with the Stoned Guy,Rewatching this episode and I appreciate it more. The humor holds up surprisingly well.,positive,0.977397
3,BingWatch,1x01 - The One Where Monica Gets a Roommate,This one missed the mark with side story. I found myself dying laughing during most of it.,negative,0.780301
4,GellerPride,1x05 - The One with the East German Laundry Detergent,Not terrible but uneven. The performances could have been more interesting.,negative,0.647537


In [23]:
df.groupby("sentimiento")["score"].mean()

sentimiento
negative    0.861661
neutral     0.533297
positive    0.926222
Name: score, dtype: float64

In [24]:
translator = pipeline(
    "translation",
    model="Helsinki-NLP/opus-mt-en-es"
)

Device set to use cpu


In [25]:
def traductor(texto):
    try:
        traduccion = translator(texto, max_length=512)[0]['translation_text']
        return traduccion
    except:
        return texto

In [26]:
df["review_esp"] = df["review"].progress_apply(traductor)

Pensando: 100%|██████████| 250/250 [05:29<00:00,  1.32s/it]


In [27]:
df.head()

Unnamed: 0,username,episode,review,sentimiento,score,review_esp
0,ComedyGold,1x01 - The One Where Monica Gets a Roommate,Mixed feelings about this one. Certain moments was great but the pacing fell flat.,negative,0.84639,"Sentimientos encontrados sobre éste. Ciertos momentos fueron geniales, pero el ritmo se derrumbó."
1,ComedyGold,1x09 - The One Where Underdog Gets Away,Rewatching this episode and I appreciate it more. The humor holds up remarkably well.,positive,0.977597,Rever este episodio y lo aprecio más. El humor se mantiene notablemente bien.
2,TribbianiFan,1x15 - The One with the Stoned Guy,Rewatching this episode and I appreciate it more. The humor holds up surprisingly well.,positive,0.977397,Rever este episodio y lo aprecio más. El humor se mantiene sorprendentemente bien.
3,BingWatch,1x01 - The One Where Monica Gets a Roommate,This one missed the mark with side story. I found myself dying laughing during most of it.,negative,0.780301,Este perdió la marca con la historia lateral. Me encontré muriendo de risa durante la mayor parte de ella.
4,GellerPride,1x05 - The One with the East German Laundry Detergent,Not terrible but uneven. The performances could have been more interesting.,negative,0.647537,Las actuaciones podrían haber sido más interesantes.
