In [2]:
! pip install emoji

Collecting emoji
  Using cached emoji-1.7.0-py3-none-any.whl
Installing collected packages: emoji
Successfully installed emoji-1.7.0


In [1]:
import emoji

In [2]:
emoji_dictionary = {"0": "\u2764\uFE0F",
                    "1": ":baseball:",
                    "2": ":grinning_face_with_big_eyes:",
                    "3": ":disappointed_face:",
                    "4": ":fork_and_knife:",
                    "5": ":hundred_points:",
                    "6": ":fire:",
                    "7": ":face_blowing_a_kiss:",
                    "8": ":chestnut:",
                    "9": ":flexed_biceps:"
                   }

In [3]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))         #emoji.emojize used to print the emojis

❤️
⚾
😃
😞
🍴
💯
🔥
😘
🌰
💪


In [4]:
##processing a custom dataset

In [5]:
import pandas as pd
import numpy as np

In [6]:
train = pd.read_csv('train_emoji.csv',header=None)
test = pd.read_csv('test_emoji.csv',header=None)

In [7]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [8]:
#Printing sentences with emojis

In [9]:
data = train.values
print(data.shape)

(132, 4)


In [10]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

In [11]:
for i in range(5):
    print(X_train[i],emoji.emojize(emoji_dictionary[str(Y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


In [12]:
#converting sentences into its embeddings

In [13]:
f = open('glove.6B.50d.txt', encoding = 'utf-8')

In [14]:
#in glove vector file, there is a 50 dimensional number vector for each word which gives how a word should be
# represented in numeric form  
embeddings_index = {}
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    #print(word,coefs)
    embeddings_index[word] = coefs #converting this into a dictionary - vector lookup for each word
f.close()    
    
    

In [15]:
emb_dim = embeddings_index["eat"].shape[0]
print(emb_dim)  # for using glove vectors, it's output is of fixed length of 50 dimension

50


In [16]:
#step 4 converting sentences into vectors(Embedding layer output)

In [17]:
def embedding_output(X):
    maxlen = 10 #for each sentence in the input, it's max length is already defined
    embedding_out = np.zeros((X.shape[0],maxlen,emb_dim)) #X.shape[0] gives us the batch size, maxlen is the maximum length of
    ##each of the sentences in the batch and emb_dim are the output dimension of each word from embedding layer
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split() #we are splitting each sentences to its words to iterate over it 
        
        for ij in range(len(X[ix])):
            #going to every word in the current ix sentence
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()] #output of the sentence when the word is made lowercase
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    return embedding_out                
    



In [18]:
embeddings_matrix_train = embedding_output(X_train)
embeddings_matrix_test = embedding_output(X_test)
print(X_train[0])
print(len(X_train[0]))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[ix] = X[ix].split() #we are splitting each sentences to its words to iterate over it


['never', 'talk', 'to', 'me', 'again']
5


In [19]:
print(embeddings_matrix_test.shape)#(no of sentences, max length of each sentence(i.e. 10 words in each sentence), output dimension of each word)

(56, 10, 50)


In [20]:
print(embeddings_matrix_train.shape) #(no of sentences, max length of each sentence(i.e. 10 words in each sentence), output dimension of each word)

(132, 10, 50)


In [21]:
#converting y train to one hot vectors
from keras.utils import to_categorical
Y_train = to_categorical(Y_train, num_classes=5)
Y_test = to_categorical(Y_test, num_classes=5)
print(Y_train.shape)
print(Y_train[0])


(132, 5)
[0. 0. 0. 1. 0.]


In [22]:
#Defining RNN/LSTM model

In [23]:
from keras.models import Sequential
from keras.layers import *

In [24]:
model = Sequential() #for sequential model
model.add(LSTM(64,input_shape = (10,50), return_sequences= True)) #10 words in each input sentence with 50 dimensions, hidden layer dimension=64 which is also
                                          #output dimension, return_sequences = true, this will give data to the next LSTM layer
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences = False))
model.add(Dropout(0.5))
model.add(Dense(5)) #dense layer for final classification with 5 emjoi output
model.add(Activation('softmax'))#softmax classifier is used as we have multiple classes so for that we use it as it will give us probabilities
#multiple classification problem hence, we use that type of model
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 64)            29440     
                                                                 
 dropout (Dropout)           (None, 10, 64)            0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 5)                 325       
                                                                 
 activation (Activation)     (None, 5)                 0         
                                                                 
Total params: 62,789
Trainable params: 62,789
Non-traina

In [25]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint("best_model.h5", monitor='val_loss', verbose=True, save_best_only=True) #to restore the best model
# among the given models
earlystop = EarlyStopping(monitor = 'val_acc', patience = 10)

hist = model.fit(embeddings_matrix_train, Y_train, epochs = 100, batch_size=64, shuffle=True, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [26]:
#model.load_weights("best_model.h5")

In [27]:
model.evaluate(embeddings_matrix_test, Y_test)



[0.8215122222900391, 0.8035714030265808]

In [28]:
pred = model.predict(embeddings_matrix_test)
classes = np.argmax(pred, axis=1)



In [29]:
print(classes)

[4 3 2 0 2 2 3 2 4 2 1 2 0 3 1 3 2 2 3 4 3 3 4 2 3 3 2 0 1 2 0 1 3 2 0 1 2
 4 4 2 1 0 0 1 2 2 3 2 3 3 3 0 3 2 2 4]


In [30]:
for i in range(30):
    print(' '.join(X_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(classes[i])]))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a very nice raise
😃
😃
she got me a nice present
😃
❤️
ha ha ha it was so funny
😃
😃
he is a good friend
😃
😃
I am upset
😞
😞
We had such a lovely dinner tonight
😃
😃
where is the food
🍴
🍴
Stop making this joke ha ha ha
😃
😃
where is the ball
⚾
⚾
work is hard
😞
😃
This girl is messing with me
😞
❤️
are you serious
😞
😞
Let us go play baseball
⚾
⚾
This stupid grader is not working
😞
😞
work is horrible
😞
😃
Congratulation for having a baby
😃
😃
stop pissing me off
😞
😞
any suggestions for dinner
🍴
🍴
I love taking breaks
❤️
😞
you brighten my day
😃
😞
I boiled rice
🍴
🍴
she is a bully
😞
😃
Why are you feeling bad
😞
😞
I am upset
😞
😞
give me the ball
⚾
😃
My grandmother is the love of my life
❤️
❤️
enjoy your game
⚾
⚾
valentine day is near
😃
😃


In [33]:
#Doing the below processing for flask integration of the model

In [58]:
with open("model.json" , "w") as file: #converting model to json 
    file.write(model.to_json())
model.save_weights("best_model.h5")    

In [59]:
from keras.models import model_from_json


In [60]:
with open("model.json", "r") as file:
    model = model_from_json(file.read())
model.load_weights("best_model.h5")    

In [61]:
test_str = "Hello how are you"
X = pd.Series([test_str])

In [62]:
emb_x = embedding_output(X)

In [63]:
pred = model.predict(emb_x)
c = np.argmax(pred, axis=1)



In [67]:
print(c)

[3]


In [66]:
print(' '.join(X[0]))
print(emoji.emojize(emoji_dictionary[str(c[0])]))

Hello how are you
😞
