#import used modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM, Dropout, LeakyReLU, Bidirectional
from tensorflow.keras.optimizers import Nadam
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import string
import re

#download dataset from kaggle

In [2]:
! chmod 600 kaggle.json && (ls ~/.kaggle 2>/dev/null || mkdir ~/.kaggle) && mv kaggle.json ~/.kaggle/
! kaggle datasets download -d ishantjuyal/emotions-in-text
! ls

Downloading emotions-in-text.zip to /content
  0% 0.00/781k [00:00<?, ?B/s]
100% 781k/781k [00:00<00:00, 52.1MB/s]
emotions-in-text.zip  sample_data


In [3]:
! unzip ./emotions-in-text.zip

Archive:  ./emotions-in-text.zip
  inflating: Emotion_final.csv       


#read data from csv to dataframe

In [None]:
path = './Emotion_final.csv'
df = pd.read_csv(path, error_bad_lines=False)

#check dataframe

In [None]:
df.head(5)

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [None]:
df.isnull().sum()

Text       0
Emotion    0
dtype: int64

In [None]:
df.shape

(21459, 2)

In [None]:
print(df['Emotion'].unique())

['sadness' 'anger' 'love' 'surprise' 'fear' 'happy']


In [None]:
df['Text'].head(10)

0                              i didnt feel humiliated
1    i can go from feeling so hopeless to so damned...
2     im grabbing a minute to post i feel greedy wrong
3    i am ever feeling nostalgic about the fireplac...
4                                 i am feeling grouchy
5    ive been feeling a little burdened lately wasn...
6    ive been taking or milligrams or times recomme...
7    i feel as confused about life as a teenager or...
8    i have been with petronas for years i feel tha...
9                                  i feel romantic too
Name: Text, dtype: object

#preprocessing

In [None]:
newDf = df.copy()

In [None]:
newDf

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
21454,Melissa stared at her friend in dism,fear
21455,Successive state elections have seen the gover...,fear
21456,Vincent was irritated but not dismay,fear
21457,Kendall-Hume turned back to face the dismayed ...,fear


In [None]:
classEncoder = OneHotEncoder(sparse = False)
classColumns = classEncoder.fit_transform(newDf[['Emotion']])
classColumns

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0.],
       ...,
       [0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.]])

In [None]:
classColumnsdf = pd.DataFrame(classColumns)
classColumnsdf.columns = [i for i in classEncoder.categories_[0]]
outputNum = len([i for i in classEncoder.categories_[0]])
classColumnsdf

Unnamed: 0,anger,fear,happy,love,sadness,surprise
0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
21454,0.0,1.0,0.0,0.0,0.0,0.0
21455,0.0,1.0,0.0,0.0,0.0,0.0
21456,0.0,1.0,0.0,0.0,0.0,0.0
21457,0.0,1.0,0.0,0.0,0.0,0.0


In [None]:
newDf.drop('Emotion', axis = 1, inplace = True)
newDf = pd.concat([newDf, classColumnsdf], axis = 1)
newDf

Unnamed: 0,Text,anger,fear,happy,love,sadness,surprise
0,i didnt feel humiliated,0.0,0.0,0.0,0.0,1.0,0.0
1,i can go from feeling so hopeless to so damned...,0.0,0.0,0.0,0.0,1.0,0.0
2,im grabbing a minute to post i feel greedy wrong,1.0,0.0,0.0,0.0,0.0,0.0
3,i am ever feeling nostalgic about the fireplac...,0.0,0.0,0.0,1.0,0.0,0.0
4,i am feeling grouchy,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
21454,Melissa stared at her friend in dism,0.0,1.0,0.0,0.0,0.0,0.0
21455,Successive state elections have seen the gover...,0.0,1.0,0.0,0.0,0.0,0.0
21456,Vincent was irritated but not dismay,0.0,1.0,0.0,0.0,0.0,0.0
21457,Kendall-Hume turned back to face the dismayed ...,0.0,1.0,0.0,0.0,0.0,0.0


#split and process train test data

In [None]:
tokenizer = Tokenizer(17000, lower = False)
tokenizer.fit_on_texts(newDf['Text'])
numWord = len(tokenizer.word_index)

In [None]:
numWord

19943

In [None]:
trainX, validX, trainY, validY = train_test_split(newDf['Text'], newDf.drop('Text', axis=1).values,
                               test_size = 0.2, random_state = 81, stratify = newDf.drop('Text', axis=1).values)

In [None]:
trainX.size

17167

In [None]:
testX.size

4292

In [None]:
def createSequences(texts, tokenizer):
  sequences = tokenizer.texts_to_sequences(texts.values)
  return pad_sequences(sequences=sequences, padding='post')

In [None]:
trainSeq = createSequences(trainX, tokenizer)
validSeq = createSequences(validX, tokenizer)

In [None]:
trainSeq

array([[   1,   65,  245, ...,    0,    0,    0],
       [   1,   20,  119, ...,    0,    0,    0],
       [   1,  193,  257, ...,    0,    0,    0],
       ...,
       [   1,    2,  357, ...,    0,    0,    0],
       [   1,  408,    6, ...,    0,    0,    0],
       [   1,   20, 3035, ...,    0,    0,    0]], dtype=int32)

In [None]:
def defineModel():
  layers = [
            Embedding(numWord, 64),
            Bidirectional(LSTM(128, return_sequences=True)),
            Bidirectional(LSTM(64)),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dense(16, activation='relu'),
            Dense(outputNum, activation='softmax')

  ]
  model = Sequential(layers)
  model.compile(loss = 'categorical_crossentropy', optimizer = Nadam(learning_rate = 0.01), metrics = ['accuracy'])
  return model

In [None]:
model = defineModel()
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 64)          1276352   
_________________________________________________________________
bidirectional_6 (Bidirection (None, None, 256)         197632    
_________________________________________________________________
bidirectional_7 (Bidirection (None, 128)               164352    
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 32)                4128      
_________________________________________________________________
dense_10 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_11 (Dense)             (None, 6)                

In [None]:
CHECKPOINT = './checkpoints/checkpoint.ckpt'
earlyStopping = EarlyStopping(monitor = 'loss', 
                              mode = 'min', 
                              patience = 5
)
checkpoint = ModelCheckpoint(filepath = CHECKPOINT,
                             monitor = 'val_accuracy', 
                             save_weights_only=True, 
                             mode = 'max', 
                             save_best_only = True
)
callbacks = [
        earlyStopping,
        checkpoint
]

model.fit(
    x = trainSeq,
    y = trainY,
    epochs = 200,
    steps_per_epoch = 16,
    verbose = 1,
    callbacks = callbacks,
    validation_data = (validSeq, validY),
    batch_size = 128,
    validation_batch_size = 128
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200


<tensorflow.python.keras.callbacks.History at 0x7fba456cbd50>

In [None]:
final_model = defineModel()
model.load_weights(CHECKPOINT)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fba42fa0ed0>

In [None]:
loss, acc = model.evaluate(validSeq, validY, verbose = 1)
print(f"final accuracy: {acc * 100}")

final accuracy: 91.49580597877502
