In [1]:
from transformers import pipeline

# For clf_1
clf_1 = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

# For clf_2
clf_2 = pipeline("text-classification", model="j-hartmann/emotion-english-roberta-large", return_all_scores=True)

  return self.fget.__get__(instance, owner)()


In [14]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "j-hartmann/emotion-english-roberta-large"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Access labels from the model configuration (if available)
labels = model.config.id2label

In [15]:
labels

{0: 'anger',
 1: 'disgust',
 2: 'fear',
 3: 'joy',
 4: 'neutral',
 5: 'sadness',
 6: 'surprise'}

In [16]:
model_name = "j-hartmann/emotion-english-distilroberta-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Access labels from the model configuration (if available)
labels2 = model.config.id2label
labels2

{0: 'anger',
 1: 'disgust',
 2: 'fear',
 3: 'joy',
 4: 'neutral',
 5: 'sadness',
 6: 'surprise'}

In [2]:
import pandas as pd
import numpy as np
np.random.seed(42)

In [3]:
df = pd.read_csv("text.csv")

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   Unnamed: 0  416809 non-null  int64 
 1   text        416809 non-null  object
 2   label       416809 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 9.5+ MB


In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [17]:
text = df["text"].tolist()

In [18]:
df.isna().sum()

Unnamed: 0    0
text          0
label         0
dtype: int64

In [19]:
# sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5)

In [21]:
def transform_dataset(x):
    if x == 0:
        return 5
    elif x == 1:
        return 3
    elif x == 2:
        return 0
    elif x == 3:
        return 4
    elif x == 4:
        return 2
    elif x == 5:
        return 6

In [22]:
"""{0: 'anger',
 1: 'disgust',
 2: 'fear',
 3: 'joy',
 4: 'neutral',
 5: 'sadness',
 6: 'surprise'}"""

"{0: 'anger',\n 1: 'disgust',\n 2: 'fear',\n 3: 'joy',\n 4: 'neutral',\n 5: 'sadness',\n 6: 'surprise'}"

In [23]:
df["new_label"] = df["label"].apply(transform_dataset)

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   Unnamed: 0  416809 non-null  int64 
 1   text        416809 non-null  object
 2   label       416809 non-null  int64 
 3   new_label   416809 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 12.7+ MB


In [25]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label,new_label
0,0,i just feel really helpless and heavy hearted,4,2
1,1,ive enjoyed being able to slouch about relax a...,0,5
2,2,i gave up my internship with the dmrg and am f...,4,2
3,3,i dont know i feel so lost,0,5
4,4,i am a kindergarten teacher and i am thoroughl...,4,2


In [35]:
new_df = df.iloc[:1000, :]

In [36]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  1000 non-null   int64 
 1   text        1000 non-null   object
 2   label       1000 non-null   int64 
 3   new_label   1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [37]:
new_df.head()

Unnamed: 0.1,Unnamed: 0,text,label,new_label
0,0,i just feel really helpless and heavy hearted,4,2
1,1,ive enjoyed being able to slouch about relax a...,0,5
2,2,i gave up my internship with the dmrg and am f...,4,2
3,3,i dont know i feel so lost,0,5
4,4,i am a kindergarten teacher and i am thoroughl...,4,2


In [38]:
# Training the dataset

clf1_pred = clf_1(new_df["text"].tolist())
clf1_pred

[[{'label': 'anger', 'score': 0.0034491275437176228},
  {'label': 'disgust', 'score': 0.0009591793641448021},
  {'label': 'fear', 'score': 0.6391155123710632},
  {'label': 'joy', 'score': 0.006666888017207384},
  {'label': 'neutral', 'score': 0.003345145843923092},
  {'label': 'sadness', 'score': 0.3436662554740906},
  {'label': 'surprise', 'score': 0.002797880908474326}],
 [{'label': 'anger', 'score': 0.0012321859830990434},
  {'label': 'disgust', 'score': 0.0003673480823636055},
  {'label': 'fear', 'score': 0.0005369979189708829},
  {'label': 'joy', 'score': 0.0035801641643047333},
  {'label': 'neutral', 'score': 0.00117393652908504},
  {'label': 'sadness', 'score': 0.991905689239502},
  {'label': 'surprise', 'score': 0.0012035970576107502}],
 [{'label': 'anger', 'score': 0.013019061647355556},
  {'label': 'disgust', 'score': 0.0003270053712185472},
  {'label': 'fear', 'score': 0.944549560546875},
  {'label': 'joy', 'score': 0.006304342765361071},
  {'label': 'neutral', 'score': 0.00

In [39]:
clf2_pred = clf_2(new_df["text"].tolist())
clf2_pred

[[{'label': 'anger', 'score': 0.005761963780969381},
  {'label': 'disgust', 'score': 0.00036620552418753505},
  {'label': 'fear', 'score': 0.9437331557273865},
  {'label': 'joy', 'score': 0.0037481999024748802},
  {'label': 'neutral', 'score': 0.0003311517939437181},
  {'label': 'sadness', 'score': 0.040974900126457214},
  {'label': 'surprise', 'score': 0.005084328353404999}],
 [{'label': 'anger', 'score': 0.0034759182017296553},
  {'label': 'disgust', 'score': 0.00027499828138388693},
  {'label': 'fear', 'score': 0.0014909857418388128},
  {'label': 'joy', 'score': 0.2069539576768875},
  {'label': 'neutral', 'score': 0.0016696800012141466},
  {'label': 'sadness', 'score': 0.7812843322753906},
  {'label': 'surprise', 'score': 0.00485004298388958}],
 [{'label': 'anger', 'score': 0.00960445310920477},
  {'label': 'disgust', 'score': 0.00023285162751562893},
  {'label': 'fear', 'score': 0.9834297895431519},
  {'label': 'joy', 'score': 0.0005601223092526197},
  {'label': 'neutral', 'score':

In [40]:
def get_dataframe(predictions):
    lst = []
    for instance in predictions:
        dic = {}
        for pair in instance:
            dic[pair['label']] = pair['score']
        lst.append(dic)
    return pd.DataFrame(lst)


In [41]:
list1 = get_dataframe(clf1_pred)
list2 = get_dataframe(clf2_pred)

In [42]:
list1.head()

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise
0,0.003449,0.000959,0.639116,0.006667,0.003345,0.343666,0.002798
1,0.001232,0.000367,0.000537,0.00358,0.001174,0.991906,0.001204
2,0.013019,0.000327,0.94455,0.006304,0.000431,0.03442,0.000949
3,0.000722,0.001125,0.001413,0.000782,0.003972,0.989984,0.002003
4,0.033221,0.00076,0.840228,0.035165,0.000388,0.088591,0.001647


In [43]:
list1_np = list1.values

In [44]:
type(list1_np)

numpy.ndarray

In [47]:
list1_np
list2_np = list2.values

In [46]:
def get_label(df):
    result = []
    for val in df:
        result.append(np.argmax(val))
    return result
    

In [48]:
result1 = get_label(list1_np)

In [50]:
result2 = get_label(list2_np)

In [57]:
true_label1 = new_df["new_label"].tolist()

In [59]:
from sklearn.metrics import classification_report
classes = {0: 'anger',
 1: 'disgust',
 2: 'fear',
 3: 'joy',
 4: 'neutral',
 5: 'sadness',
 6: 'surprise'}
report1 = classification_report(true_label1, result1)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
print(report1)

              precision    recall  f1-score   support

           0       0.01      0.03      0.02        76
           1       0.00      0.00      0.00         0
           2       0.77      0.85      0.81       109
           3       0.83      0.92      0.87       346
           4       0.00      0.00      0.00       135
           5       0.90      0.92      0.91       304
           6       0.64      0.77      0.70        30

    accuracy                           0.72      1000
   macro avg       0.45      0.50      0.47      1000
weighted avg       0.67      0.72      0.69      1000

