In [33]:
import numpy as np
import pandas as pd
import keras
import tensorflow
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras.utils import to_categorical

In [34]:
data = pd.read_csv(r'C:\Users\User\Desktop\Data Science\Deep Learning\data\text_emotion_classification\train.txt', sep = ";")

In [35]:
data.columns = ["text", "emotions"]
data.head()

Unnamed: 0,text,emotions
0,i can go from feeling so hopeless to so damned...,sadness
1,im grabbing a minute to post i feel greedy wrong,anger
2,i am ever feeling nostalgic about the fireplac...,love
3,i am feeling grouchy,anger
4,ive been feeling a little burdened lately wasn...,sadness


In [36]:
texts = data['text'].tolist()
labels = data['emotions'].tolist()
print(data['emotions'].unique())

['sadness' 'anger' 'love' 'surprise' 'fear' 'joy']


In [37]:
# tokenize the text data

tkn = Tokenizer()
tkn.fit_on_texts(texts)
print(texts[0])
print(len(tkn.word_index))

i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake
15212


In [38]:
# padding the data to feed into the neural network. Padding makes data to have same length to feed

sequences = tkn.texts_to_sequences(texts)
max_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen = max_length)
print(padded_sequences[0])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    1   39  100   59    7   14  493    4   14 3495  552
   31   59   60  127  147   75 1479    3   21 1254]


In [39]:
# using label encoder to convert string into the numerical representation

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
print(labels)

[4 0 3 ... 2 0 4]


In [40]:
# Encoding the label into one-hot label encorder
one_hot_labels = keras.utils.to_categorical(labels)

In [41]:
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, one_hot_labels, test_size = 0.2, random_state = 7)

In [44]:
model = Sequential()
model.add(Embedding(input_dim= len(tkn.word_index)+ 1,
                   output_dim = 128, input_length = max_length))
model.add(Flatten())
model.add(Dense(units = 128, activation = 'relu'))
model.add(Dense(units = len(one_hot_labels[0]), activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, epochs = 10, batch_size = 32, validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x237ae3e6f10>

In [56]:
input_text = "She didn't come today because she lost her dog yesterday"
input_2 = "I successfully cleared my exam today"
# process the input text
input_sequence = tkn.texts_to_sequences([input_2])
padded_input_sequence = pad_sequences(input_sequence, maxlen = max_length)
prediction = model.predict(padded_input_sequence)
print(prediction, 'is output vector')
prediction_label = label_encoder.inverse_transform([np.argmax(prediction[0])])
prediction_label = ' '.join(prediction_label)
print(prediction_label, '-- the actual emotion')

[[0.01904604 0.26280832 0.6976272  0.0104871  0.00718176 0.00284956]] is output vector
joy -- the actual emotion
