## The notebook is running in google colab

In [1]:
!pip install emoji



In [2]:
import numpy as np
import pandas as pd

In [3]:
train = pd.read_csv('/content/dataset/train_emoji.csv',header=None)
test = pd.read_csv('/content/dataset/test_emoji.csv',header=None)

In [4]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [5]:
import emoji

In [6]:
emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":beaming_face_with_smiling_eyes:",
                    "3": ":downcast_face_with_sweat:",
                    "4": ":fork_and_knife:",
                   }

In [7]:
emoji.emojize("\u2764\uFE0F")

'❤️'

In [8]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))

❤️
⚾
😁
😓
🍴


In [9]:
data = train.values
for i in range(10):
    print(data[i][0],emoji.emojize(emoji_dictionary[str(data[i][1])]))

never talk to me again 😓
I am proud of your achievements 😁
It is the worst day in my life 😓
Miss you so much ❤️
food is life 🍴
I love you mum ❤️
Stop saying bullshit 😓
congratulations on your acceptance 😁
The assignment is too long  😓
I want to go play ⚾


In [10]:
# One-Hot Encoding
from keras.utils import to_categorical

In [11]:
XT = train[0]
Xt = test[0]

YT = to_categorical(train[1])
Yt = to_categorical(test[1])


print(XT.shape)
print(Xt.shape)
print(YT.shape)
print(Yt.shape)

(132,)
(56,)
(132, 5)
(56, 5)


In [12]:
embeddings = {}
with open('glove.6B.50d.txt',encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coeffs = np.asarray(values[1:],dtype='float32')

        # print(word)
        # print(coeffs)
        embeddings[word] = coeffs

In [13]:
def getOutputEmbeddings(X):

    embedding_matrix_output = np.zeros((X.shape[0],10,50))
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for jx in range(len(X[ix])):
            embedding_matrix_output[ix][jx] = embeddings[X[ix][jx].lower()]

    return embedding_matrix_output

In [14]:
XT[1]

'I am proud of your achievements'

In [None]:
emb_XT = getOutputEmbeddings(XT)
emb_Xt = getOutputEmbeddings(Xt)

In [17]:
print(emb_XT.shape)
print(emb_Xt.shape)

(132, 10, 50)
(56, 10, 50)


In [18]:
from keras.layers import LSTM, Dropout, Dense, Activation
from keras.models import Sequential

In [None]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(64,input_shape=(10,50)))
model.add(Dropout(0.3))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
model.fit(emb_XT,YT,batch_size=32,epochs=40,shuffle=True,validation_split=0.1)

In [21]:
model.evaluate(emb_Xt,Yt)



[1.6697900295257568, 0.625]

In [None]:
pred = model.predict(emb_Xt)

In [None]:
for i in range(30):
    print(' '.join(Xt[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Yt[i]))]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(pred[i]))]))

In [27]:
with open("model.json", "w") as file:
    file.write(model.to_json())
model.save_weights("model.h5")

In [28]:
from keras.models import model_from_json

In [29]:
with open("model.json", "r") as file:
    model = model_from_json(file.read())
model.load_weights("model.h5")

In [36]:
test_str = "Hello how are you"
X = pd.Series([test_str])

In [37]:
emb_X = getOutputEmbeddings(X)

In [39]:
p = model.predict(emb_X)



In [41]:
print(' '.join(X[0]))
print(emoji.emojize(emoji_dictionary[str(np.argmax(p[0]))]))

Hello how are you
😁
