In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import re
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

In [4]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [5]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [6]:
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [7]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [8]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Emotion Recognition/Dataset/data.csv')

In [9]:
df = df[df['Label'] != 2]

In [10]:
train_split = int(len(df) * 0.8)
val_split = int(len(df) * 0.9)

In [11]:
emotion_train = df[:train_split]
emotion_valid = df[train_split:val_split]
emotion_test = df[val_split:]

In [None]:
emotion_train.dropna(inplace=True)
emotion_test.dropna(inplace=True)
emotion_valid.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emotion_train.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emotion_test.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emotion_valid.dropna(inplace=True)


In [None]:
emotion_train.head(-5)

Unnamed: 0,Text,Label
0,i feel awful about it too because it s my job ...,0
1,im alone i feel awful,0
2,ive probably mentioned this before but i reall...,1
3,i was feeling a little low few days back,0
6,i am one of those people who feels like going ...,1
...,...,...
333329,i feel a little helpless as i cant go back thr...,4
333330,i feel relieved that i have an excuse for not ...,1
333331,i have a horrible feeling that his mad gay lab...,3
333333,i feel at ease running with mandy and trust th...,3


In [None]:
emotion_train.tail()

Unnamed: 0,Text,Label
333335,i allow her to share special moments with a gr...,3
333336,i feel called to this work and because i am pa...,1
333337,i feel anger when you describe to me in detail...,0
333339,i guess but i feel so stupid for being lured i...,0
333340,im doing and i answer truthfully that its been...,5


In [None]:
emotion_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 305804 entries, 0 to 333340
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   Text    305804 non-null  object
 1   Label   305804 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 7.0+ MB


In [None]:
emotion_valid.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38225 entries, 333341 to 375054
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    38225 non-null  object
 1   Label   38225 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 895.9+ KB


In [None]:
emotion_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41681 entries, 375128 to 416808
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    41681 non-null  object
 1   Label   41681 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 651.4+ KB


NLP cleaning

In [None]:
stemming = PorterStemmer()

In [None]:
def nlp_cleaning(data):
    corpus = []
    i = 0
    for sent in data:
        print(i, end=' ')
        no_num = re.sub('[^a-zA-Z]', ' ', sent)
        lowercase = no_num.lower()
        print(lowercase)
        words = lowercase.split()
        after_stem = [stemming.stem(word) for word in words if not word in stopwords.words('english')]
        senten = ' '.join(after_stem)
        corpus.append(senten)
        i += 1
    return corpus

Transforming the data

In [12]:
cv = CountVectorizer(max_features=1000)
tv = TfidfVectorizer(max_features=1000)

In [None]:
X_train = nlp_cleaning(emotion_train['Text'])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
328447 i might not buy the full size of this i will finish using the sample size as it did make my skin feel super smooth after
328448 i feel so devoted to him
328449 i have been having this feeling in my heart to do a sweet thankful post
328450 i started to feel surprisingly calm
328451 i feel the game is atrociously clever
328452 i do this sometimes and then i check fb on my phone and it feels so ludicrous but still i do it like reaching for a drink when you know damn well youve had enough or another cookie or a chip
328453 i thought ryan would be feeling cold
328454 i started to feel really dumb
328455 i still feel stupid rude and naive for judging her exactly as i have been unfairly judged
328456 i feel like tumblr lookbook and blogs are the trendsetters these days and one day magazines will be very worthless since many people get their inspirations from the internet
328457 im not sure surprise is the best word to des

In [None]:
X_valid = nlp_cleaning(emotion_valid['Text'])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
36681 i cant describe to you how bad it is to feel cold air circulating around my feet even though i wear socks and have two layers of blanket
36682 i feel blessed that my world has expanded through photography and hope my passion never weakens
36683 i feel very unhappy and incomplete
36684 i feel like i should be distraught and wounded but i feel renewed and that i have freewill to go on my adventures
36685 i wish i could assert myself without feeling selfish
36686 i would rub my crotch for hours as i drove around feeling like a naughty dancer girl
36687 i am feeling more fucked this exam period than any exam period prior because
36688 i hate this feeling the feeling where am helpless am not able to control the situation around me i cant do anything to make things better i don t know how i can work my way out
36689 i feel very rebellious lately
36690 ive been feeling a little sentimental lately and this just added to tho

In [None]:
X_test = nlp_cleaning(emotion_test['Text'])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
36681 i feel the pain in my vein its oh so vain am i insane
36682 i can do is something that i feel is worthwhile
36683 i feel today keen on writing here and in all my blogs lmao
36684 i just spontaneously dusted and vacuumed my flat no visitors are expected no nasty comments from the mother sparked it im feeling very very virtuous
36685 i thought he was the love of my life and i feel tortured emotionally
36686 i hope i will have made progress i can feel proud of
36687 i still feel like i am trying to balance everything in my life precariously and things are messy but there is optimism there too
36688 i was relieved that the author didnt feel pressured in giving them both equal screen time
36689 i did however feel a bit shocked when the rim of a tire shot through one of the back windows and out the other
36690 i do feel nervous but i m excited at the same time and i cant wait to see what college offers
36691 i thought abo

In [13]:
X_train = emotion_train['Text']
X_valid = emotion_valid['Text']
X_test = emotion_test['Text']

In [14]:
X_train_cv = cv.fit_transform(X_train).toarray()
X_valid_cv = cv.transform(X_valid).toarray()
X_test_cv = cv.transform(X_test).toarray()

In [None]:
# X_train_tv = tv.fit_transform(X_train).toarray()
# X_validate_tv = tv.transform(X_validate).toarray()
# X_test_tv = tv.transform(X_test).toarray()

In [15]:
y_train = emotion_train['Label'].values
y_valid = emotion_valid['Label'].values
y_test = emotion_test['Label'].values

In [None]:
# X_train_cv = np.expand_dims(X_train_cv, 2)
# X_valid_cv = np.expand_dims(X_valid_cv, 2)
# X_test_cv = np.expand_dims(X_test_cv, 2)

In [16]:
X_train_cv.shape

(305804, 1000)

In [17]:
model = Sequential()

# model.add(LSTM(128, input_shape = (1000, 1)))
model.add(Dense(500, activation='relu', input_shape = (1000, )))
model.add(Dropout(0.5))

model.add(Dense(250, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(5, activation='softmax'))

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 500)               500500    
                                                                 
 dropout (Dropout)           (None, 500)               0         
                                                                 
 dense_1 (Dense)             (None, 250)               125250    
                                                                 
 dropout_1 (Dropout)         (None, 250)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               25100     
                                                                 
 dropout_2 (Dropout)         (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 64)                6

In [19]:
optimizer = Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])



In [20]:
dummies = pd.get_dummies(emotion_train['Label'])
new_y_train = dummies.values
new_y_train

array([[1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       ...,
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1]], dtype=uint8)

In [21]:
val_dummies = pd.get_dummies(emotion_valid['Label'])
new_y_val = val_dummies.values

X_valid_cv.shape, new_y_val.shape

((38225, 1000), (38225, 5))

In [23]:
model.fit(x=X_train_cv, y=new_y_train, batch_size=128, validation_data=(X_valid_cv, new_y_val), epochs=25, verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f01f1262e90>

In [None]:
model.save('/content/drive/MyDrive/Colab Notebooks/Emotion Recognition/DL_saved_model')



In [None]:
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/Emotion Recognition/DL_saved_model')

In [None]:
y_pred = loaded_model.predict(X_test_cv)



In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.674774,0.764592,0.390205,0.852091
1,0.432056,0.84925,0.362834,0.857033
2,0.394422,0.856166,0.358346,0.855834
3,0.374241,0.859297,0.359227,0.855762
4,0.360782,0.862473,0.356601,0.857201
5,0.34928,0.864167,0.358351,0.854994
6,0.340113,0.866653,0.368235,0.859576
7,0.331014,0.869002,0.368646,0.85605
8,0.326232,0.871119,0.36586,0.857513
9,0.318062,0.872735,0.368635,0.858689


In [None]:
y_test = emotion_test['Label'].tolist()
y_test

[1,
 4,
 5,
 0,
 0,
 0,
 1,
 2,
 5,
 1,
 1,
 4,
 1,
 2,
 2,
 1,
 3,
 3,
 4,
 1,
 1,
 2,
 0,
 0,
 1,
 4,
 4,
 1,
 3,
 4,
 1,
 1,
 0,
 1,
 1,
 3,
 4,
 3,
 1,
 2,
 0,
 3,
 1,
 0,
 0,
 1,
 1,
 4,
 1,
 2,
 1,
 5,
 0,
 1,
 4,
 1,
 1,
 0,
 4,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 2,
 1,
 1,
 0,
 4,
 0,
 1,
 3,
 0,
 0,
 2,
 1,
 0,
 0,
 2,
 0,
 4,
 4,
 1,
 1,
 0,
 3,
 0,
 0,
 3,
 2,
 1,
 0,
 0,
 1,
 4,
 5,
 0,
 1,
 1,
 1,
 3,
 1,
 4,
 2,
 1,
 2,
 1,
 5,
 0,
 0,
 0,
 1,
 0,
 3,
 0,
 1,
 1,
 0,
 1,
 3,
 2,
 4,
 3,
 0,
 0,
 0,
 0,
 0,
 3,
 2,
 0,
 1,
 4,
 2,
 2,
 1,
 1,
 0,
 4,
 3,
 0,
 0,
 0,
 3,
 1,
 3,
 2,
 1,
 0,
 2,
 4,
 1,
 2,
 0,
 0,
 1,
 1,
 1,
 1,
 3,
 3,
 0,
 1,
 3,
 0,
 0,
 4,
 0,
 0,
 3,
 0,
 1,
 1,
 1,
 1,
 3,
 1,
 2,
 0,
 2,
 3,
 4,
 4,
 0,
 2,
 1,
 2,
 1,
 1,
 0,
 0,
 0,
 4,
 3,
 3,
 1,
 2,
 0,
 1,
 2,
 4,
 1,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 4,
 0,
 1,
 0,
 4,
 4,
 3,
 0,
 5,
 1,
 4,
 0,
 4,
 3,
 4,
 0,
 1,
 4,
 0,
 1,
 4,
 1,
 1,
 4,
 0,
 0,
 1,
 4,
 4,
 0,


In [None]:
y_pred

array([[1.1007062e-03, 5.4098296e-01, 4.5605627e-01, 1.4263185e-03,
        3.8901248e-04, 4.4614684e-05],
       [5.4012211e-03, 4.2654076e-03, 4.8268379e-05, 2.3265039e-03,
        9.8777217e-01, 1.8641046e-04],
       [1.1736698e-06, 2.2165979e-05, 1.3431877e-07, 5.5811478e-07,
        1.2419333e-06, 9.9997467e-01],
       ...,
       [9.9996716e-01, 6.5310219e-06, 1.2420199e-08, 2.5694973e-05,
        4.3221485e-07, 5.9002131e-10],
       [3.4957516e-08, 9.9999994e-01, 3.5406353e-09, 3.5586737e-08,
        3.5442754e-10, 1.5940100e-12],
       [9.9999940e-01, 3.5387583e-08, 3.8140029e-12, 6.3787166e-07,
        3.5414149e-10, 4.6884812e-14]], dtype=float32)

In [None]:
final_pred = np.argmax(y_pred, axis = 1)

In [None]:
final_pred

array([1, 4, 5, ..., 0, 1, 0])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

cr = classification_report(y_test, final_pred)
cm = confusion_matrix(y_test, final_pred)

print(cm)
print()
print(cr)

[[11097   525    29   121    72    27]
 [  412 13524    95    85    77    36]
 [  128  1193  2157    24     8    12]
 [  689   398    27  4586    48     7]
 [  448   248     5   305  3407   420]
 [   59   184     5     9    14  1200]]

              precision    recall  f1-score   support

           0       0.86      0.93      0.90     11871
           1       0.84      0.95      0.89     14229
           2       0.93      0.61      0.74      3522
           3       0.89      0.80      0.84      5755
           4       0.94      0.70      0.81      4833
           5       0.71      0.82      0.76      1471

    accuracy                           0.86     41681
   macro avg       0.86      0.80      0.82     41681
weighted avg       0.87      0.86      0.86     41681



In [None]:
df['Label'].value_counts()

1    141067
0    121187
3     57317
4     47712
2     34554
5     14972
Name: Label, dtype: int64

In [None]:
def custom_prediction():
  text = input('Enter you text : ')
  text = [text]
  emotions = ['sad', 'joy', 'love', 'anger', 'fear', 'surprise']
  text_cv = cv.transform(text)
  pred = loaded_model.predict(text_cv)
  prediction = np.argmax(pred, axis=1)[0]
  print(emotions[prediction])

In [None]:
custom_prediction()

Enter you text : i am in love with you
joy
