# Multiclass Emotion Classification using Deep Learning

In [1]:
import tensorflow as tf
from tensorflow import keras

import pandas as pd

  from ._conv import register_converters as _register_converters


In [2]:
df=pd.read_csv("emotion.csv")

df

Unnamed: 0,text,emotions
0,i feel awful about it too because it s my job ...,sadness
1,im alone i feel awful,sadness
2,ive probably mentioned this before but i reall...,joy
3,i was feeling a little low few days back,sadness
4,i beleive that i am much more sensitive to oth...,love
5,i find myself frustrated with christians becau...,love
6,i am one of those people who feels like going ...,joy
7,i feel especially pleased about this as this h...,joy
8,i was struggling with these awful feelings and...,joy
9,i feel so enraged but helpless at the same time,anger


In [3]:
# shape indicates dimensions

df.shape # rows, columns = df.shape

(416809, 2)

In [4]:
df.head() 

Unnamed: 0,text,emotions
0,i feel awful about it too because it s my job ...,sadness
1,im alone i feel awful,sadness
2,ive probably mentioned this before but i reall...,joy
3,i was feeling a little low few days back,sadness
4,i beleive that i am much more sensitive to oth...,love


In [5]:
df.tail(10) 

Unnamed: 0,text,emotions
416799,i always feel so valued when i am with her bec...,joy
416800,im truly sorry for the abandoned feeling my fe...,love
416801,i do feel my body aching dying crying for slee...,sadness
416802,i can t imagine her having as much of an immed...,surprise
416803,i could never bear feeling a violent thought,anger
416804,that was what i felt when i was finally accept...,joy
416805,i take every day as it comes i m just focussin...,fear
416806,i just suddenly feel that everything was fake,sadness
416807,im feeling more eager than ever to claw back w...,joy
416808,i give you plenty of attention even when i fee...,sadness


In [6]:
# Let’s now take a look at the number of instances(rows) that belong to each class. We can view this as an absolute count.
df.groupby('emotions').size()

emotions
anger        57317
fear         47712
joy         141067
love         34554
sadness     121187
surprise     14972
dtype: int64

In [3]:
#df = df[df.Sentiment != 'unsup']
df['emotions'] = df['emotions'].map({'anger': 0, 'fear': 1, 'joy': 2, 'love': 3, 'sadness': 4, 'surprise': 5})
df.head()

Unnamed: 0,text,emotions
0,i feel awful about it too because it s my job ...,4
1,im alone i feel awful,4
2,ive probably mentioned this before but i reall...,2
3,i was feeling a little low few days back,4
4,i beleive that i am much more sensitive to oth...,3


In [8]:
df

Unnamed: 0,text,emotions
0,i feel awful about it too because it s my job ...,4
1,im alone i feel awful,4
2,ive probably mentioned this before but i reall...,2
3,i was feeling a little low few days back,4
4,i beleive that i am much more sensitive to oth...,3
5,i find myself frustrated with christians becau...,3
6,i am one of those people who feels like going ...,2
7,i feel especially pleased about this as this h...,2
8,i was struggling with these awful feelings and...,2
9,i feel so enraged but helpless at the same time,0


In [4]:
vocab_size = 15000
embedding_dim = 100
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 333447

In [5]:
#This block of code is not used in this Project (try to run without this block to recheck)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


max_features = 6000
tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>") #num_word=maximum number of words we want to care about
tokenizer.fit_on_texts(df['text'])
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(df['text'])

padded = pad_sequences(sequences, padding='post')

#print(padded[0])
#print(padded.shape)

In [6]:
training_sentences = df.text[0:training_size]
testing_sentences = df.text[training_size:]
training_labels = df.emotions[0:training_size]
testing_labels = df.emotions[training_size:]

In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) #num_word=maximum number of words we want to care about, oov_token deals with the token that is out of vocabulary
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index  # this just to see which word is used for which number(index), as the most common word/token will be represented by 1 by tokenizer and so on

training_sequences = tokenizer.texts_to_sequences(training_sentences) # create sequence for the sentences
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [None]:
# Need this block to get it to work with TensorFlow 2.x
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

In [14]:
print(np.shape(training_padded))

print(np.shape(training_labels))

(333447, 100)
(333447,)


In [15]:
print(np.shape(testing_padded))

(83362, 100)


In [16]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), #embedding layer is for vectors direction for multiclass problem in nlp, direction of each word will be learnt epoch by epoch
    tf.keras.layers.GlobalAveragePooling1D(),  # pooling in nlp- adding up the vectors in a particular direction
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])

model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [17]:
num_epochs = 10
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)

Train on 333447 samples, validate on 83362 samples
Epoch 1/10
333447/333447 - 370s - loss: 0.4818 - accuracy: 0.8235 - val_loss: 0.2425 - val_accuracy: 0.8990
Epoch 2/10
333447/333447 - 365s - loss: 0.2050 - accuracy: 0.9089 - val_loss: 0.2065 - val_accuracy: 0.9003
Epoch 3/10
333447/333447 - 371s - loss: 0.1726 - accuracy: 0.9139 - val_loss: 0.1993 - val_accuracy: 0.9005
Epoch 4/10
333447/333447 - 368s - loss: 0.1563 - accuracy: 0.9177 - val_loss: 0.1930 - val_accuracy: 0.8985
Epoch 5/10
333447/333447 - 366s - loss: 0.1470 - accuracy: 0.9206 - val_loss: 0.1962 - val_accuracy: 0.8942
Epoch 6/10
333447/333447 - 366s - loss: 0.1399 - accuracy: 0.9232 - val_loss: 0.2050 - val_accuracy: 0.8945
Epoch 7/10
333447/333447 - 371s - loss: 0.1353 - accuracy: 0.9253 - val_loss: 0.2111 - val_accuracy: 0.8939
Epoch 8/10
333447/333447 - 365s - loss: 0.1310 - accuracy: 0.9270 - val_loss: 0.2297 - val_accuracy: 0.8918
Epoch 9/10
333447/333447 - 369s - loss: 0.1276 - accuracy: 0.9284 - val_loss: 0.2274 

In [18]:
sentence = ["It's a horrible movie waste of time"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))

[[1.7880311e-02 1.0132099e-03 6.5801672e-05 3.4366141e-09 9.8103988e-01
  8.7459921e-07]]


In [19]:
sentence = ["I feel quite satisfied for this project"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))

[[8.3987499e-05 2.3984182e-06 9.9982965e-01 2.6877214e-09 8.3908242e-05
  2.1253689e-08]]


## Using SVM

In [None]:
from sklearn import svm

clf = svm.SVC()

clf.fit(training_padded, training_labels)

In [None]:
y_pred=clf.predict(testing_padded)

y_pred

In [None]:
from sklearn.metrics import accuracy_score

print('SVM Accuracy:',accuracy_score(testing_labels,y_pred))

In [None]:
#Predict new instances

sentence = ["She is very irritated and annoyed"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(clf.predict(padded))