# Emoji_Prdictor
- We provide our model with some sentences which then predicts the emoji most associated with it.

In [167]:
import numpy as np
import pandas as pd

### Load and see data

In [168]:
data = pd.read_csv('./emojify_data.csv', header=None)
test_data = pd.read_csv('./test_emoji.csv', header=None)

In [169]:
data.head(n=10)

Unnamed: 0,0,1,2,3
0,French macaroon is so tasty,4,,
1,work is horrible,3,,
2,I am upset,3,,[3]
3,throw the ball,1,,[2]
4,Good joke,2,,
5,what is your favorite baseball game,1,,
6,I cooked meat,4,,
7,stop messing around,3,,
8,I want chinese food,4,,
9,Let us go play baseball,1,,


In [170]:
test_data.head(n=10)

Unnamed: 0,0,1
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a very nice raise\t,2
3,she got me a nice present\t,2
4,ha ha ha it was so funny\t,2
5,he is a good friend\t,2
6,I am upset\t,3
7,We had such a lovely dinner tonight\t,2
8,where is the food\t,4
9,Stop making this joke ha ha ha\t,2


In [171]:
print(test_data.shape)

(56, 2)


In [172]:
Data = data.values
test_data = test_data.values
print(Data.shape)

(183, 4)


### Prepare Data

In [174]:
X_train = Data[:,0]
Y_train = Data[:,1]
X_test = test_data[:,0]
Y_test = test_data[:,1]
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(183,)
(183,)
(56,)
(56,)


In [175]:
print(np.unique(Y_train))

[0 1 2 3 4]


In [176]:
for i in range(5):
    print(X_test[i],Y_test[i])

I want to eat	 4
he did not answer	 3
he got a very nice raise	 2
she got me a nice present	 2
ha ha ha it was so funny	 2


In [177]:
import emoji

In [178]:
emoji_dict = { 0 : "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
               1 : ":baseball:",
               2 : ":smile:",
               3 : ":disappointed:",
               4 : ":fork_and_knife:"
             }
for ix in emoji_dict.keys():
    print(ix,emoji.emojize(emoji_dict[ix],use_aliases=True))

0 ❤️
1 ⚾
2 😄
3 😞
4 🍴


In [179]:
for i in range(5):
    print(X_train[i], emoji.emojize(emoji_dict[Y_train[i]], use_aliases=True))

French macaroon is so tasty 🍴
work is horrible 😞
I am upset 😞
throw the ball ⚾
Good joke 😄


### Get Pre-trained weights using Transfer Learning

In [180]:
f = open('glove.6B.50d.txt', encoding='utf-8')

In [181]:
embeddings_index = {}

for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    embeddings_index[word] = coefs
f.close()

In [182]:
embeddings_index['good']

array([-3.5586e-01,  5.2130e-01, -6.1070e-01, -3.0131e-01,  9.4862e-01,
       -3.1539e-01, -5.9831e-01,  1.2188e-01, -3.1943e-02,  5.5695e-01,
       -1.0621e-01,  6.3399e-01, -4.7340e-01, -7.5895e-02,  3.8247e-01,
        8.1569e-02,  8.2214e-01,  2.2220e-01, -8.3764e-03, -7.6620e-01,
       -5.6253e-01,  6.1759e-01,  2.0292e-01, -4.8598e-02,  8.7815e-01,
       -1.6549e+00, -7.7418e-01,  1.5435e-01,  9.4823e-01, -3.9520e-01,
        3.7302e+00,  8.2855e-01, -1.4104e-01,  1.6395e-02,  2.1115e-01,
       -3.6085e-02, -1.5587e-01,  8.6583e-01,  2.6309e-01, -7.1015e-01,
       -3.6770e-02,  1.8282e-03, -1.7704e-01,  2.7032e-01,  1.1026e-01,
        1.4133e-01, -5.7322e-02,  2.7207e-01,  3.1305e-01,  9.2771e-01])

In [183]:
embeddings_index['good'].shape

(50,)

In [184]:
# The input to the RNN model will be a 3-D volume with dimensions batch_size, 
# dimension_of_each_word(50 here) and the maximum possible length of each sentence
# which can be varied depending on input. Here we are using relatively smaller
# sentences so we use 10. In input to the RNN model will be 3-D vector of dimension
# (50,50,10) considering a batch size of 50
def embedding_output(X):
    maxLen = 10
    emb_dim = 50
    embedding_out = np.zeros((X.shape[0], maxLen,emb_dim))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        
        for ij in range(len(X[ix])):
            # go to every word in the current (ix) sentence
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    
    return embedding_out

In [185]:
embeddings_matrix_train = embedding_output(X_train)
embeddings_matrix_test = embedding_output(X_test)

In [186]:
print(X_train[0])
print(len(X_train[0]))

['French', 'macaroon', 'is', 'so', 'tasty']
5


In [187]:
print(embeddings_matrix_train.shape)

(183, 10, 50)


In [188]:
Y_train.shape

(183,)

In [189]:
from keras.utils import to_categorical

In [190]:
Y_train = to_categorical(Y_train, num_classes=5)
Y_test = to_categorical(Y_test, num_classes=5)
print(Y_train.shape)
print(Y_train[0])

(183, 5)
[0. 0. 0. 0. 1.]


## Define and train model

In [191]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import *

In [192]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_8 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_9 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_7 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [193]:
hist = model.fit(embeddings_matrix_train, Y_train, epochs=100,batch_size=64,shuffle=True, validation_split=0.2)

Train on 146 samples, validate on 37 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


### Predict and Evaluate accuracy

In [194]:
pred = model.predict_classes(embeddings_matrix_test)
print(pred)

[4 3 2 0 2 0 3 0 4 2 1 3 3 3 1 3 3 2 3 4 0 0 4 0 3 3 1 0 2 0 0 1 3 2 0 1 2
 4 4 2 3 2 0 1 2 0 3 2 3 3 3 0 3 2 2 3]


In [195]:
model.evaluate(embeddings_matrix_test,Y_test)



[0.9837713582175118, 0.7857142686843872]

In [196]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

In [197]:
checkpoint = ModelCheckpoint("best_model.h5", monitor='val_loss',verbose=True,save_best_only=True)
earlystop = EarlyStopping(monitor='val_acc', patience=10)
hist = model.fit(embeddings_matrix_train, Y_train, epochs=100,batch_size=64,shuffle=True, validation_split=0.2, callbacks=[checkpoint,earlystop])

Train on 146 samples, validate on 37 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.55893, saving model to best_model.h5
Epoch 2/100

Epoch 00002: val_loss improved from 1.55893 to 1.54828, saving model to best_model.h5
Epoch 3/100




Epoch 00003: val_loss did not improve from 1.54828
Epoch 4/100

Epoch 00004: val_loss improved from 1.54828 to 1.45006, saving model to best_model.h5
Epoch 5/100

Epoch 00005: val_loss did not improve from 1.45006
Epoch 6/100

Epoch 00006: val_loss improved from 1.45006 to 1.43183, saving model to best_model.h5
Epoch 7/100

Epoch 00007: val_loss did not improve from 1.43183
Epoch 8/100

Epoch 00008: val_loss did not improve from 1.43183
Epoch 9/100

Epoch 00009: val_loss did not improve from 1.43183
Epoch 10/100

Epoch 00010: val_loss improved from 1.43183 to 1.37809, saving model to best_model.h5
Epoch 11/100

Epoch 00011: val_loss did not improve from 1.37809
Epoch 12/100

Epoch 00012: val_loss did not improve from 1.37809
Epoch 13/100

Epoch 00013: val_loss did not improve from 1.37809
Epoch 14/100

Epoch 00014: val_loss did not improve from 1.37809
Epoch 15/100

Epoch 00015: val_loss did not improve from 1.37809
Epoch 16/100

Epoch 00016: val_loss did not improve from 1.37809
Epoc


Epoch 00044: val_loss did not improve from 1.37809
Epoch 45/100

Epoch 00045: val_loss did not improve from 1.37809
Epoch 46/100

Epoch 00046: val_loss did not improve from 1.37809
Epoch 47/100

Epoch 00047: val_loss did not improve from 1.37809
Epoch 48/100

Epoch 00048: val_loss did not improve from 1.37809
Epoch 49/100

Epoch 00049: val_loss did not improve from 1.37809
Epoch 50/100

Epoch 00050: val_loss did not improve from 1.37809
Epoch 51/100

Epoch 00051: val_loss did not improve from 1.37809
Epoch 52/100

Epoch 00052: val_loss did not improve from 1.37809
Epoch 53/100

Epoch 00053: val_loss did not improve from 1.37809
Epoch 54/100

Epoch 00054: val_loss did not improve from 1.37809
Epoch 55/100

Epoch 00055: val_loss did not improve from 1.37809
Epoch 56/100

Epoch 00056: val_loss did not improve from 1.37809
Epoch 57/100

Epoch 00057: val_loss did not improve from 1.37809
Epoch 58/100

Epoch 00058: val_loss did not improve from 1.37809
Epoch 59/100

Epoch 00059: val_loss di


Epoch 00085: val_loss did not improve from 1.37809
Epoch 86/100

Epoch 00086: val_loss did not improve from 1.37809
Epoch 87/100

Epoch 00087: val_loss did not improve from 1.37809
Epoch 88/100

Epoch 00088: val_loss did not improve from 1.37809
Epoch 89/100

Epoch 00089: val_loss did not improve from 1.37809
Epoch 90/100

Epoch 00090: val_loss did not improve from 1.37809
Epoch 91/100

Epoch 00091: val_loss did not improve from 1.37809
Epoch 92/100

Epoch 00092: val_loss did not improve from 1.37809
Epoch 93/100

Epoch 00093: val_loss did not improve from 1.37809
Epoch 94/100

Epoch 00094: val_loss did not improve from 1.37809
Epoch 95/100

Epoch 00095: val_loss did not improve from 1.37809
Epoch 96/100

Epoch 00096: val_loss did not improve from 1.37809
Epoch 97/100

Epoch 00097: val_loss did not improve from 1.37809
Epoch 98/100

Epoch 00098: val_loss did not improve from 1.37809
Epoch 99/100

Epoch 00099: val_loss did not improve from 1.37809
Epoch 100/100

Epoch 00100: val_loss d

In [198]:
model.load_weights("best_model.h5")

In [199]:
model.evaluate(embeddings_matrix_test,Y_test)



[0.7397586107254028, 0.8035714030265808]

In [200]:
for i in range(30):
    print(' '.join(X_test[i]))
    print(emoji.emojize(emoji_dict[np.argmax(Y_test[i])], use_aliases=True))
    print(emoji.emojize(emoji_dict[pred[i]], use_aliases=True))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a very nice raise
😄
😄
she got me a nice present
😄
❤️
ha ha ha it was so funny
😄
😄
he is a good friend
😄
❤️
I am upset
😞
😞
We had such a lovely dinner tonight
😄
❤️
where is the food
🍴
🍴
Stop making this joke ha ha ha
😄
😄
where is the ball
⚾
⚾
work is hard
😞
😞
This girl is messing with me
😞
😞
are you serious
😞
😞
Let us go play baseball
⚾
⚾
This stupid grader is not working
😞
😞
work is horrible
😞
😞
Congratulation for having a baby
😄
😄
stop pissing me off
😞
😞
any suggestions for dinner
🍴
🍴
I love taking breaks
❤️
❤️
you brighten my day
😄
❤️
I boiled rice
🍴
🍴
she is a bully
😞
❤️
Why are you feeling bad
😞
😞
I am upset
😞
😞
give me the ball
⚾
⚾
My grandmother is the love of my life
❤️
❤️
enjoy your game
⚾
😄
valentine day is near
😄
❤️
