# Human Emotions classifier

In [10]:
import csv

In [11]:
input_file = 'D:\\Jupyter\\tfidf_ex\\train.txt'
output_file = 'emotions.csv'

In [12]:
with open(input_file,'r') as f:
    sentences = f.readlines()

In [13]:
with open(output_file,'w') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['text', 'emotion'])

    for sentence in sentences:
        # Split the sentence by ';'
        columns = sentence.strip().split(';')
        # Write the columns to the CSV file
        writer.writerow(columns)

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("emotions.csv")
df.shape


(16000, 2)

In [21]:
print(df['emotion'].value_counts())

df['emotion_num'] = df['emotion'].map({
    "joy"       :  0,
    "sadness"   :  1,
    "anger"    :   2,
    "fear"     :   3,
    "love"     :   4,
    "surprise" :   5
})

df.head()

emotion
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64


Unnamed: 0,text,emotion,emotion_num
0,i didnt feel humiliated,sadness,1
1,i can go from feeling so hopeless to so damned...,sadness,1
2,im grabbing a minute to post i feel greedy wrong,anger,2
3,i am ever feeling nostalgic about the fireplac...,love,4
4,i am feeling grouchy,anger,2


In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    df.text,
    df.emotion_num,
    test_size=0.2,
    stratify=df.emotion,
    random_state=2022
)

In [23]:
X_train.head(5)

6373     i have really notcied is my mental clarity lik...
12524    i feel loyal to him in some ways so respect hi...
12962    i made for the bee has left me feeling pretty ...
13404    i wrapped one child after another in a hug i r...
8525     i like to feel that is exactly what i do for m...
Name: text, dtype: object

In [24]:
X_test.head(5)

13454    i dont feel the need to be truthful its comple...
7280     i will focus on either an infantry company or ...
7413                          i am feeling a little lonely
4381     i feel i am beyond pissed off disappointed fru...
8534     i can pick at my skin for a while and make mys...
Name: text, dtype: object

In [25]:
y_train.value_counts()

emotion_num
0    4290
1    3733
2    1727
3    1550
4    1043
5     457
Name: count, dtype: int64

In [29]:
min_samples = 1550
df_joy = df[df.emotion == 'joy'].sample(min_samples,random_state = 2022)
df_sadness = df[df.emotion == 'sadness'].sample(min_samples,random_state = 2022)
df_anger = df[df.emotion == 'anger'].sample(min_samples,random_state = 2022)
df_fear = df[df.emotion == 'fear'].sample(min_samples,random_state = 2022)

In [30]:
df_balanced = pd.concat([df_joy,df_sadness,df_anger,df_fear],axis = 0)

In [46]:
df_balanced.emotion.value_counts()
# df_balanced.head()

emotion
joy        1550
sadness    1550
anger      1550
fear       1550
Name: count, dtype: int64

In [94]:
from sklearn.model_selection import train_test_split
X1_train, X1_test, y1_train, y1_test = train_test_split(
    df_balanced.text,
    df_balanced.emotion_num,
    test_size=0.3,
    stratify=df_balanced.emotion_num,
    random_state=2022
)

In [40]:
X1_train.head()

12980    i feel so eager for the rest of the photos to ...
4350                         i feel irritated and helpless
5093     i feel that i was damaged by gt gt gt religion...
1923     i feel a bit uncertain really shes a nice girl...
15599                           i feel triumphant and such
Name: text, dtype: object

In [42]:
y1_train.head()

12980    0
4350     2
5093     1
1923     3
15599    0
Name: emotion_num, dtype: int64

In [44]:
X1_test.head()

6649     i wish i knew how he was really feeling aside ...
11165    im being a teenager people and if you feel the...
5168     i now feel everythings been resolved were psyc...
2485     i sat with dave atell at first trying not to f...
520      i really lose a lot of my nesting homemaking i...
Name: text, dtype: object

In [50]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [52]:
clf = Pipeline([
    ('count_vec_unigram',CountVectorizer(ngram_range=(1,1))),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X1_train,y1_train)
y1_predict = clf.predict(X1_test)
print(classification_report(y1_test,y1_predict))

              precision    recall  f1-score   support

           0       0.83      0.96      0.89       310
           1       0.92      0.84      0.88       310
           2       0.94      0.89      0.92       310
           3       0.93      0.92      0.93       310

    accuracy                           0.90      1240
   macro avg       0.91      0.90      0.90      1240
weighted avg       0.91      0.90      0.90      1240



In [97]:
clf = Pipeline([
    ('count_vec_bigram',CountVectorizer(ngram_range=(1,2))),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X1_train,y1_train)
y1_predict = clf.predict(X1_test)
print(classification_report(y1_test,y1_predict))

              precision    recall  f1-score   support

           0       0.79      0.92      0.85       465
           1       0.89      0.78      0.83       465
           2       0.92      0.88      0.90       465
           3       0.91      0.90      0.91       465

    accuracy                           0.87      1860
   macro avg       0.88      0.87      0.87      1860
weighted avg       0.88      0.87      0.87      1860



In [56]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [93]:
clf = Pipeline([
    ('tfidvectorizer',TfidfVectorizer()),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X1_train,y1_train)
y1_predict = clf.predict(X1_test)
print(classification_report(y1_test,y1_predict))

              precision    recall  f1-score   support

           0       0.76      0.89      0.82       465
           1       0.84      0.75      0.80       465
           2       0.93      0.86      0.89       465
           3       0.92      0.92      0.92       465

    accuracy                           0.86      1860
   macro avg       0.86      0.86      0.86      1860
weighted avg       0.86      0.86      0.86      1860



In [98]:
clf.predict(["The room filled with laughter as friends embraced, celebrating together.","Alone in the quiet room, tears silently traced her weary cheeks.","His clenched fists trembled with rage as he unleashed fury.","She spat out bitter words, eyes ablaze with seething resentment."])

array([0, 1, 0, 2], dtype=int64)

# Use text pre-processing to remove stop words, punctuations and apply lemmatization

In [76]:
import spacy
nlp = spacy.load("en_core_web_sm") 

def preprocess(text):
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_punct or token.is_stop:
            continue
        filtered_tokens.append(token.lemma_)
    return " ".join(filtered_tokens)

In [77]:
df_balanced['preprocessed_comment'] = df_balanced['text'].apply(preprocess)
df_balanced.head()

Unnamed: 0,text,emotion,emotion_num,preprocessed_comment
15478,i want to do with my life is an amazing feelin...,joy,0,want life amazing feeling not pleased future head
552,i checked on you was a long time ago i can say...,joy,0,check long time ago happy way feel contented
4021,i should do but i think it means that i should...,joy,0,think mean open opportunity invite involve min...
13217,i feel the near and lively presence of the wel...,joy,0,feel near lively presence love past
2784,i am left tonight feeling so hopeful for the f...,joy,0,leave tonight feeling hopeful future orphan cr...


In [83]:
X2_train, X2_test, y2_train, y2_test = train_test_split(
    df_balanced.preprocessed_comment,
    df_balanced.emotion_num,
    test_size=0.2,
    stratify=df_balanced.emotion_num,
    random_state=2022
)

In [84]:
clf = Pipeline([
    ('count_vec_unigram',CountVectorizer(ngram_range=(1,1))),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X2_train,y2_train)
y2_predict = clf.predict(X2_test)
print(classification_report(y2_test,y2_predict))

              precision    recall  f1-score   support

           0       0.90      0.91      0.91       310
           1       0.86      0.88      0.87       310
           2       0.92      0.89      0.91       310
           3       0.94      0.93      0.94       310

    accuracy                           0.90      1240
   macro avg       0.90      0.90      0.90      1240
weighted avg       0.90      0.90      0.90      1240



In [85]:
clf = Pipeline([
    ('count_vec_bigram',CountVectorizer(ngram_range=(1,2))),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X2_train,y2_train)
y2_predict = clf.predict(X2_test)
print(classification_report(y2_test,y2_predict))

              precision    recall  f1-score   support

           0       0.92      0.91      0.91       310
           1       0.86      0.91      0.88       310
           2       0.94      0.90      0.92       310
           3       0.92      0.92      0.92       310

    accuracy                           0.91      1240
   macro avg       0.91      0.91      0.91      1240
weighted avg       0.91      0.91      0.91      1240



In [90]:
clf = Pipeline([
    ('tfid_vectorizer',TfidfVectorizer()),
    ('Random Forest', RandomForestClassifier())
])

clf.fit(X2_train,y2_train)
y2_predict = clf.predict(X2_test)
print(classification_report(y2_test,y2_predict))

              precision    recall  f1-score   support

           0       0.84      0.95      0.89       310
           1       0.90      0.84      0.86       310
           2       0.92      0.89      0.90       310
           3       0.94      0.91      0.93       310

    accuracy                           0.90      1240
   macro avg       0.90      0.90      0.90      1240
weighted avg       0.90      0.90      0.90      1240



In [100]:
clf.predict(["The room filled with laughter as friends embraced, celebrating together.",
             "Alone in the quiet room, tears silently traced her weary cheeks.",
             "His clenched fists trembled with rage as he unleashed fury.",
             "She spat out bitter words, eyes ablaze with seething resentment.",
            "I am done with you!"])

array([0, 1, 0, 2, 2], dtype=int64)

In [None]:
# bigram without preprocessing is giving much better result