In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import numpy as np

In [2]:
vocab_size = 20000
embedding_dim = 16
max_length = 300
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

In [15]:
d1 = pd.read_csv("archive2/train.txt",
                       delimiter=';',names=['sentence','label'],header=None)

frame = d1
train_df = frame
test_df = pd.read_csv("archive2/test.txt",
                      delimiter=';',names=['sentence','label'],header=None)
train_df

Unnamed: 0,sentence,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
16023,i just had a very brief time in the beanbag an...,sadness
16024,i am now turning and i feel pathetic that i am...,sadness
16025,i feel strong and good overall,joy
16026,i feel like this was such a rude comment and i...,anger


In [4]:
test_df.iloc[33,:]

sentence    i feel so cold a href http irish
label                                  anger
Name: 33, dtype: object

### **Emotion Detect**

In [18]:
emotion = train_df.label.unique()
emotion_dict = {}
for i,v in enumerate(emotion):
    emotion_dict[v] = i
emotion_dict

{'sadness': 0, 'anger': 1, 'love': 2, 'surprise': 3, 'fear': 4, 'joy': 5}

In [19]:
train_df['label']= train_df['label'].apply(lambda x: emotion_dict.get(x))
test_df['label']= test_df['label'].apply(lambda x: emotion_dict.get(x))
test_df

Unnamed: 0,sentence,label
0,im feeling rather rotten so im not very ambiti...,0
1,im updating my blog because i feel shitty,0
2,i never make her separate from me because i do...,0
3,i left with my bouquet of red and yellow tulip...,5
4,i was feeling a little vain when i did this one,0
...,...,...
1995,i just keep feeling like someone is being unki...,1
1996,im feeling a little cranky negative after this...,1
1997,i feel that i am useful to my people and that ...,5
1998,im feeling more comfortable with derby i feel ...,5


In [8]:
train_sentence = train_df['sentence'].tolist()
test_sentence = test_df['sentence'].tolist()

In [9]:
train_sentence[:10]

['i didnt feel humiliated',
 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake',
 'im grabbing a minute to post i feel greedy wrong',
 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property',
 'i am feeling grouchy',
 'ive been feeling a little burdened lately wasnt sure why that was',
 'ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny',
 'i feel as confused about life as a teenager or as jaded as a year old man',
 'i have been with petronas for years i feel that petronas has performed well and made a huge profit',
 'i feel romantic too']

In [10]:
tokenizer = Tokenizer(oov_token=oov_tok)
tokenizer.fit_on_texts(train_sentence) # the input must be 2D array 
len(tokenizer.word_index)


15213

In [11]:
# find max sentence len
m = 0
vt =0
for i,v in enumerate(train_sentence):
    if m < len(v):
        m = len(v)
        vt = i
        
print(m)
print(train_sentence[vt])

300
i hope that those of you who actauly found this and read it feel possibly inspired to go out and buy some of these items or even go through storage and see what clothes of yours your mom saved and that you still have a hope of fitting in and mix up your wardrobe for this summer and have a little fun


In [12]:
train_sequence = tokenizer.texts_to_sequences(train_sentence)
train_padded = pad_sequences(train_sequence,padding='post',
                             maxlen=max_length,
                             truncating=trunc_type)
train_padded.shape

(16028, 300)

In [13]:
test_sequence = tokenizer.texts_to_sequences(test_sentence)
test_padded = pad_sequences(test_sequence,padding='post',maxlen=max_length)
print(test_padded.shape)

(2000, 300)


In [20]:
train_label = []
for i in train_df['label']:
    tmp = np.zeros(6)
    tmp[int(i)] = 1
    train_label.append(tmp)
train_label = np.array(train_label)
train_label
test_label = []
for i in test_df['label']:
    tmp = np.zeros(6)
    tmp[int(i)] = 1
    test_label.append(tmp)
test_label = np.array(test_label)
train_label

array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.]])

In [22]:
from model_processing_1 import model_processing

m = model_processing(train_sentence,test_sentence)
m.main_model()


In [23]:
m.model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 300, 100)          2000000   
                                                                 
 bidirectional_2 (Bidirectio  (None, 300, 128)         84480     
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 64)               41216     
 nal)                                                            
                                                                 
 dense_3 (Dense)             (None, 24)                1560      
                                                                 
 dense_4 (Dense)             (None, 12)                300       
                                                                 
 dense_5 (Dense)             (None, 6)                

In [24]:
train_padded = np.array(train_padded)
test_padded = np.array(test_padded)
test_label.shape


(2000, 6)

In [None]:
num_epochs = 50
history = m.model.fit(train_padded, train_label, epochs=5, validation_data=(test_padded,test_label), verbose=2)

In [31]:
m.load_model("model_vn.keras")







In [32]:
swapped_dict = {value: key for key, value in emotion_dict.items()}
def answer(arr,emotion_dict):
    m = max(arr)
    for i in range(len(arr)):
        if arr[i] == m:
            print(swapped_dict[i])

In [39]:
#'sadness': 0, 'anger': 1, 'love': 2, 'surprise': 3, 'fear': 4, 'joy': 5
sentences = [
    input()
]
sequence = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequence,maxlen=max_length,
                       padding = padding_type,
                       truncating=trunc_type)
for i in m.model.predict(padded):
    answer(i,swapped_dict)

anger
