In [1]:
import numpy as np
import emoji
import matplotlib.pyplot as plt
import pandas as pd
import io
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Dropout, LSTM, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.initializers import glorot_uniform

In [2]:
train=pd.read_csv('train_emoji.csv')
train

Unnamed: 0,sentence,emoji
0,never talk to me again,3
1,I am proud of your achievements,2
2,It is the worst day in my life,3
3,Miss you so much,0
4,food is life,4
...,...,...
127,he had to make a home run,1
128,I am ordering food,4
129,What is wrong with you,3
130,I love you,0


In [3]:
train_X=train['sentence'].to_numpy()
train_Y=train['emoji'].to_numpy()
print(train_X.shape)
print(train_Y.shape)

(132,)
(132,)


In [4]:
value=[]
embeddings_index = {}
with io.open('glove.6B.50d.txt', encoding='utf8') as f:
    for line in f:
        values = line.split()
        value.append(values)
        word = values[0]
        coefs = np.asarray(values[1:],dtype='float32')
        embeddings_index[word] = coefs

In [5]:
print(len(value))
print(len(value[0]))
value[0]

400000
51


['the',
 '0.418',
 '0.24968',
 '-0.41242',
 '0.1217',
 '0.34527',
 '-0.044457',
 '-0.49688',
 '-0.17862',
 '-0.00066023',
 '-0.6566',
 '0.27843',
 '-0.14767',
 '-0.55677',
 '0.14658',
 '-0.0095095',
 '0.011658',
 '0.10204',
 '-0.12792',
 '-0.8443',
 '-0.12181',
 '-0.016801',
 '-0.33279',
 '-0.1552',
 '-0.23131',
 '-0.19181',
 '-1.8823',
 '-0.76746',
 '0.099051',
 '-0.42125',
 '-0.19526',
 '4.0071',
 '-0.18594',
 '-0.52287',
 '-0.31681',
 '0.00059213',
 '0.0074449',
 '0.17778',
 '-0.15897',
 '0.012041',
 '-0.054223',
 '-0.29871',
 '-0.15749',
 '-0.34758',
 '-0.045637',
 '-0.44251',
 '0.18785',
 '0.0027849',
 '-0.18411',
 '-0.11514',
 '-0.78581']

In [6]:
word_to_vec_map={}
for i,v in enumerate(value):
    a=np.zeros((50))
    for j in range(1,51):
        a[j-1]=(v[j])
    word_to_vec_map[value[i][0]]=np.array(a)
    

In [7]:
word_to_index={}
index_to_word={}
for i,v in enumerate(value):
    word_to_index[v[0]]=i
    index_to_word[i]=v[0]

In [8]:
word = "son"
idx = 25602
print("the index of", word, "in the dictionary is", word_to_index[word])
print("the", str(idx) + "th word in the dictionary is", index_to_word[idx])

the index of son in the dictionary is 630
the 25602th word in the dictionary is crenshaw


In [9]:
def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0] 
    X_indices = np.zeros([m,max_len])
    for i in range(m): 
        sentence_words=X[i].lower().split()
        j=0
        for w in sentence_words:
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
                j =  j+1
    return X_indices

In [10]:
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    vocab_size = len(word_to_index)+1
    for i,key in enumerate(word_to_vec_map.keys()):
        any_word=key
        break
    emb_dim = word_to_vec_map[any_word].shape[0]
    emb_matrix =  np.zeros([vocab_size,emb_dim])
    for word, idx in word_to_index.items():
        emb_matrix[idx, :] =  word_to_vec_map[word]
    embedding_layer = Embedding(vocab_size, emb_dim ,trainable = False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

In [11]:
embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
print("weights[0][1][1] =", embedding_layer.get_weights()[0][1][1])
print("Input_dim", embedding_layer.input_dim)
print("Output_dim",embedding_layer.output_dim)

weights[0][1][1] = 0.23682
Input_dim 400001
Output_dim 50


In [12]:
input_shape=(10,)
word_to_vec_map1=word_to_vec_map
word_to_index1=word_to_index
def Emoji_suggestor(hp):
    sentence_indices =Input(shape=input_shape,dtype='int32')
    embedding_layer =pretrained_embedding_layer(word_to_vec_map1, word_to_index1)
    embeddings =embedding_layer(sentence_indices)   
    X = LSTM(units=128,return_sequences = True)(embeddings)
    X =Dropout(0.5)(X) 
    X = LSTM(units=128,return_sequences = False)(X)
    X = Dropout(0.5)(X)
    X = Dense(hp.Int('dense1_filter', min_value=5, max_value=30, step=5))(X)
    X = Dense(5)(X)
    X = Activation('softmax')(X)
    model = Model(inputs=sentence_indices,outputs=X)
    
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'])
    
    return model

In [13]:
from keras_tuner import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters

In [14]:
tuner_search=RandomSearch(Emoji_suggestor,
                          objective='val_accuracy',
                          max_trials=5,directory='output',project_name="emojisugesstor")

INFO:tensorflow:Reloading Oracle from existing project output\emojisugesstor\oracle.json
INFO:tensorflow:Reloading Tuner from output\emojisugesstor\tuner0.json


In [15]:
def convert_to_one_hot(Y, C):
    a=np.zeros((Y.shape[0],C))
    for i, y in enumerate(Y):
        a[i][Y[i]]=1
    return a

In [16]:
X_train_indices = sentences_to_indices(train_X, word_to_index, 10)
Y_train_oh = convert_to_one_hot(train_Y, C = 5)

In [17]:
tuner_search.search(X_train_indices, Y_train_oh, epochs=50,validation_split=0.1, batch_size=16, shuffle=True)

INFO:tensorflow:Oracle triggered exit


In [18]:
model=tuner_search.get_best_models(num_models=1)[0]

In [19]:

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10)]              0         
                                                                 
 embedding (Embedding)       (None, 10, 50)            20000050  
                                                                 
 lstm (LSTM)                 (None, 10, 128)           91648     
                                                                 
 dropout (Dropout)           (None, 10, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 25)                3225  

In [20]:
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'])

In [21]:
model.fit(X_train_indices, Y_train_oh, epochs=60,validation_split=0.1,initial_epoch=50, batch_size=16, shuffle=True)

Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x1bfa8972b50>

In [24]:
Y=[emoji.emojize(":sparkling_heart:"),emoji.emojize(":soccer_ball:"),emoji.emojize(":grinning_face:"),emoji.emojize(":disappointed_face:"),emoji.emojize(":fork_and_knife_with_plate:")]
Y

['💖', '⚽', '😀', '😞', '🍽️']

In [23]:
model_dir = "./emogi_suggestor_model"

localhost_save_option = tf.saved_model.SaveOptions(experimental_io_device="/job:localhost")
model.save(model_dir, options=localhost_save_option)






INFO:tensorflow:Assets written to: ./emogi_suggestor_model\assets


INFO:tensorflow:Assets written to: ./emogi_suggestor_model\assets
