In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import numpy as np

In [2]:
vocab_size = 20000
embedding_dim = 16
max_length = 300
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

In [7]:
d1 = pd.read_csv("/Users/hin03/streamlit_NLP_venv/dataset/archive2/train.txt",
                       delimiter=';',names=['sentence','label'],header=None)

frame = d1
train_df = frame
test_df = pd.read_csv("/Users/hin03/streamlit_NLP_venv/dataset/archive2/test.txt",
                      delimiter=';',names=['sentence','label'],header=None)
train_df

Unnamed: 0,sentence,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [8]:
test_df.iloc[33,:]

sentence    i feel so cold a href http irish
label                                  anger
Name: 33, dtype: object

### **Emotion Detect**

In [9]:
emotion = train_df.label.unique()
emotion_dict = {}
for i,v in enumerate(emotion):
    emotion_dict[v] = i
emotion_dict

{'sadness': 0, 'anger': 1, 'love': 2, 'surprise': 3, 'fear': 4, 'joy': 5}

In [46]:
train_df['label']= train_df['label'].apply(lambda x: emotion_dict.get(x))
test_df['label']= test_df['label'].apply(lambda x: emotion_dict.get(x))
test_df

Unnamed: 0,sentence,label
0,im feeling rather rotten so im not very ambiti...,0
1,im updating my blog because i feel shitty,0
2,i never make her separate from me because i do...,0
3,i left with my bouquet of red and yellow tulip...,5
4,i was feeling a little vain when i did this one,0
...,...,...
1995,i just keep feeling like someone is being unki...,1
1996,im feeling a little cranky negative after this...,1
1997,i feel that i am useful to my people and that ...,5
1998,im feeling more comfortable with derby i feel ...,5


In [47]:
train_sentence = [sentence for sentence in train_df['sentence']]
test_sentence = [sentence for sentence in test_df['sentence']]

In [48]:
tokenizer = Tokenizer(oov_token=oov_tok)
tokenizer.fit_on_texts(train_sentence)
tokenizer.word_index


{'<OOV>': 1,
 'i': 2,
 'feel': 3,
 'and': 4,
 'to': 5,
 'the': 6,
 'a': 7,
 'feeling': 8,
 'that': 9,
 'of': 10,
 'my': 11,
 'in': 12,
 'it': 13,
 'like': 14,
 'so': 15,
 'im': 16,
 'for': 17,
 'me': 18,
 'was': 19,
 'but': 20,
 'have': 21,
 'is': 22,
 'am': 23,
 'this': 24,
 'with': 25,
 'not': 26,
 'about': 27,
 'be': 28,
 'as': 29,
 'on': 30,
 'you': 31,
 'just': 32,
 'when': 33,
 'at': 34,
 'or': 35,
 'all': 36,
 'because': 37,
 'more': 38,
 'do': 39,
 'can': 40,
 'really': 41,
 'up': 42,
 't': 43,
 'are': 44,
 'by': 45,
 'very': 46,
 'know': 47,
 'been': 48,
 'if': 49,
 'out': 50,
 'myself': 51,
 'time': 52,
 'what': 53,
 'how': 54,
 'little': 55,
 'get': 56,
 'had': 57,
 'will': 58,
 'now': 59,
 'from': 60,
 'being': 61,
 'they': 62,
 'he': 63,
 'people': 64,
 'want': 65,
 'would': 66,
 'them': 67,
 'her': 68,
 'some': 69,
 'still': 70,
 'think': 71,
 'him': 72,
 'one': 73,
 'ive': 74,
 'an': 75,
 'even': 76,
 'who': 77,
 'life': 78,
 'there': 79,
 'its': 80,
 'make': 81,
 'bit':

In [49]:
m = 0
vt =0
for i,v in enumerate(train_sentence):
    if m < len(v):
        m = len(v)
        vt = i
        
print(m)
print(train_sentence[vt])

300
i hope that those of you who actauly found this and read it feel possibly inspired to go out and buy some of these items or even go through storage and see what clothes of yours your mom saved and that you still have a hope of fitting in and mix up your wardrobe for this summer and have a little fun


In [50]:
train_sequence = tokenizer.texts_to_sequences(train_sentence)
train_padded = pad_sequences(train_sequence,padding='post',
                             maxlen=max_length,
                             truncating=trunc_type)
train_padded.shape

(18000, 300)

In [51]:
test_sequence = tokenizer.texts_to_sequences(test_sentence)
test_padded = pad_sequences(test_sequence,padding='post',maxlen=max_length)
print(test_padded[0:10])
print(test_padded.shape)

[[   16     8   202 ...     0     0     0]
 [   16 12978    11 ...     0     0     0]
 [    2   143    81 ...     0     0     0]
 ...
 [    2  1006   163 ...     0     0     0]
 [    2    14     5 ...     0     0     0]
 [    2     1     2 ...     0     0     0]]
(2000, 300)


In [52]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24,activation='relu'),
    tf.keras.layers.Dense(6,activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])


In [53]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 300, 16)           320000    
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 24)                408       
                                                                 
 dense_1 (Dense)             (None, 6)                 150       
                                                                 
Total params: 320558 (1.22 MB)
Trainable params: 320558 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [54]:
train_label = []
for i in train_df['label']:
    tmp = np.zeros(6)
    tmp[int(i)] = 1
    train_label.append(tmp)
train_label = np.array(train_label)
train_label
test_label = []
for i in test_df['label']:
    tmp = np.zeros(6)
    tmp[int(i)] = 1
    test_label.append(tmp)
test_label = np.array(test_label)
train_label

array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.]])

In [55]:
train_padded = np.array(train_padded)
test_padded = np.array(test_padded)
test_label.shape


(2000, 6)

In [None]:
num_epochs = 50
history = model.fit(train_padded, train_label, epochs=num_epochs, validation_data=(test_padded,test_label), verbose=2)

In [None]:
swapped_dict = {value: key for key, value in emotion_dict.items()}
def answer(arr,emotion_dict):
    m = max(arr)
    for i in range(len(arr)):
        if arr[i] == m:
            print(swapped_dict[i])

In [None]:
#'sadness': 0, 'anger': 1, 'love': 2, 'surprise': 3, 'fear': 4, 'joy': 5
sentences = [
    input()
]
sequence = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequence,maxlen=max_length,
                       padding = padding_type,
                       truncating=trunc_type)
for i in model.predict(padded):
    answer(i,swapped_dict)

joy
