In [None]:
!pip install transformers

In [60]:
import pandas as pd
from transformers import pipeline
import numpy as np

In [None]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [23]:
classifier("I love this!")

[[{'label': 'anger', 'score': 0.004419781267642975},
  {'label': 'disgust', 'score': 0.0016119900392368436},
  {'label': 'fear', 'score': 0.0004138521908316761},
  {'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'neutral', 'score': 0.005764583125710487},
  {'label': 'sadness', 'score': 0.002092392183840275},
  {'label': 'surprise', 'score': 0.008528688922524452}]]

In [24]:
classifier(["I love this!", "I hate this!"])

[[{'label': 'anger', 'score': 0.004419781267642975},
  {'label': 'disgust', 'score': 0.0016119900392368436},
  {'label': 'fear', 'score': 0.0004138521908316761},
  {'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'neutral', 'score': 0.005764583125710487},
  {'label': 'sadness', 'score': 0.002092392183840275},
  {'label': 'surprise', 'score': 0.008528688922524452}],
 [{'label': 'anger', 'score': 0.6189557909965515},
  {'label': 'disgust', 'score': 0.3279286026954651},
  {'label': 'fear', 'score': 0.004268052522093058},
  {'label': 'joy', 'score': 0.0026416887994855642},
  {'label': 'neutral', 'score': 0.014557044953107834},
  {'label': 'sadness', 'score': 0.025817139074206352},
  {'label': 'surprise', 'score': 0.005831680726259947}]]

In [25]:
text_test = pd.read_csv("./data/t2e/text_test.csv")
text_test.head()

Unnamed: 0,wav_file,label,transcription
0,Ses05M_script01_3_M018,4,i kissed you .
1,Ses01F_script03_1_M002,3,it must be them then .
2,Ses04M_script03_1_M001,3,i wonder .
3,Ses02F_script03_1_F000,3,do you think it s them ?
4,Ses02F_impro05_F006,0,that s all that s all that s all you re going ...


In [26]:
unique_texts = text_test["transcription"].unique().tolist()

In [27]:
preds = classifier(unique_texts)

In [39]:
list(zip(unique_texts, preds))[:2]

[('i kissed you .',
  [{'label': 'anger', 'score': 0.00429416261613369},
   {'label': 'disgust', 'score': 0.0006018253043293953},
   {'label': 'fear', 'score': 0.0014956137165427208},
   {'label': 'joy', 'score': 0.8674219846725464},
   {'label': 'neutral', 'score': 0.02878144383430481},
   {'label': 'sadness', 'score': 0.040796007961034775},
   {'label': 'surprise', 'score': 0.05660904943943024}]),
 ('it must be them then .',
  [{'label': 'anger', 'score': 0.01174916885793209},
   {'label': 'disgust', 'score': 0.00749609712511301},
   {'label': 'fear', 'score': 0.004031592048704624},
   {'label': 'joy', 'score': 0.006925765424966812},
   {'label': 'neutral', 'score': 0.8741339445114136},
   {'label': 'sadness', 'score': 0.012472460977733135},
   {'label': 'surprise', 'score': 0.08319094777107239}])]

In [74]:
results_df = pd.DataFrame(
    columns=["transcription", "pred", "anger", "disgust", "fear", "joy",
             "neutral", "sadness", "surprise"])
# list(zip(unique_texts, preds))
# results_df
for text, pred in list(zip(unique_texts, preds)):
  pred_dict = {label_dict["label"]: label_dict["score"] for label_dict in pred}
  pred_dict["pred"] = list(pred_dict.keys())[np.argmax(list(pred_dict.values()))]
  pred_dict["transcription"] = text
  pred_df = pd.DataFrame(pred_dict, index=[0])
  # print(text, pred_dict)
  results_df = pd.concat([results_df, pred_df]).reset_index(drop=True)
  # results_df.append(pred_dict, ignore_index=True)

In [75]:
results_df.head()

Unnamed: 0,transcription,pred,anger,disgust,fear,joy,neutral,sadness,surprise
0,i kissed you .,joy,0.004294,0.000602,0.001496,0.867422,0.028781,0.040796,0.056609
1,it must be them then .,neutral,0.011749,0.007496,0.004032,0.006926,0.874134,0.012472,0.083191
2,i wonder .,surprise,0.00295,0.000581,0.005135,0.004922,0.011466,0.01612,0.958826
3,do you think it s them ?,neutral,0.025332,0.010591,0.00836,0.004837,0.488408,0.010772,0.451699
4,that s all that s all that s all you re going ...,joy,0.070038,0.007763,0.003606,0.453828,0.364092,0.040638,0.060035


In [76]:
results_df.to_csv("./data/bert_results/results.csv")

In [78]:
preds_df = text_test.merge(results_df, how="left", on="transcription")

In [79]:
preds_df.to_csv("./data/bert_results/preds.csv")