In [1]:
import numpy as np
import matplotlib.pyplot as plt
from emo_utils import *
%matplotlib widget

In [2]:
X_train, Y_train = read_csv('data/train_emoji.csv')
X_test, Y_test = read_csv('data/tesss.csv')

In [3]:
maxLen = len(max(X_train, key=len).split())

In [4]:
for idx in range(10):
    print(f"{X_train[idx]}, {label_to_emoji(Y_train[idx])}")

never talk to me again, 😞
I am proud of your achievements, 😄
It is the worst day in my life, 😞
Miss you so much, ❤️
food is life, 🍴
I love you mum, ❤️
Stop saying bullshit, 😞
congratulations on your acceptance, 😄
The assignment is too long , 😞
I want to go play, ⚾


In [5]:
Y_oh_train = convert_to_one_hot(Y_train, C=5)
Y_oh_test = convert_to_one_hot(Y_test, C=5)

In [6]:
idx = 50
print(f"Sentence '{X_train[50]}' has label index {Y_train[idx]}, which is emoji {label_to_emoji(Y_train[idx])}", )
print(f"Label index {Y_train[idx]} in one-hot encoding format is {Y_oh_train[idx]}")

Sentence 'I missed you' has label index 0, which is emoji ❤️
Label index 0 in one-hot encoding format is [1. 0. 0. 0. 0.]


In [7]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('data/glove.6B.50d.txt')

In [8]:
word = "cucumber"
idx = 289846
print(f"{word} is at {word_to_index[word]}th index")
print(f"{index_to_word[idx]} is at {idx}th index")

cucumber is at 113317th index
potatos is at 289846th index


### Emojify V1

In [9]:

def sentence_to_avg(sentence, word_to_vec_map):
    """
    Converts a sentence (string) into a list of words (strings). Extracts the GloVe representation of each word
    and averages its value into a single vector encoding the meaning of the sentence.
    
    Arguments:
    sentence -- string, one training example from X
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    
    Returns:
    avg -- average vector encoding information about the sentence, numpy-array of shape (50,)
    """
    words = sentence.lower().split()
    m = 0
    any_word = list(word_to_vec_map.keys())[0]
    avg = np.zeros((word_to_vec_map[any_word].shape))
    for word in words:
        if word in list(word_to_vec_map.keys()):
            avg += word_to_vec_map[word]
            m += 1
    if m == 0:
        return avg
    avg /= m
    return avg

$$ z^{(i)} = W . avg^{(i)} + b$$

$$ a^{(i)} = softmax(z^{(i)})$$

$$ \mathcal{L}^{(i)} = - \sum_{k = 0}^{n_y - 1} Y_{oh,k}^{(i)} * log(a^{(i)}_k)$$

In [10]:
def model(X, Y, word_to_vec_map, learning_rate = 0.01, num_iterations = 200):
    """
    Model to train word vector representations in numpy.
    
    Arguments:
    X -- input data, numpy array of sentences as strings, of shape (m, 1)
    Y -- labels, numpy array of integers between 0 and 7, numpy-array of shape (m, 1)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    learning_rate -- learning_rate for the stochastic gradient descent algorithm
    num_iterations -- number of iterations
    
    Returns:
    pred -- vector of predictions, numpy-array of shape (m, 1)
    W -- weight matrix of the softmax layer, of shape (n_y, n_h)
    b -- bias of the softmax layer, of shape (n_y,)
    """
    m = X.shape[0]
    any_word = list(word_to_vec_map.keys())[0]
    n_h = word_to_vec_map[any_word].shape[0]
    n_y = len(np.unique(Y))
    W = np.zeros((n_y, n_h))
    b = np.zeros((n_y,))
    Y_oh = convert_to_one_hot(Y, C=n_y)

    for epoch in range(num_iterations):
        for s in range(m):
            avg = sentence_to_avg(X[s], word_to_vec_map)
            z = np.matmul(W, avg) + b
            a = softmax(z)
            cost = -np.sum(Y_oh[s]*np.log(a))
            # Compute gradients 
            dz = a - Y_oh[s]
            dW = np.dot(dz.reshape(n_y,1), avg.reshape(1, n_h))
            db = dz

            # Update parameters with Stochastic Gradient Descent
            W = W - learning_rate * dW
            b = b - learning_rate * db
        if epoch % 10 == 0:
            print("Epoch: " + str(epoch) + " --- cost = " + str(cost))
            pred = predict(X, Y, W, b, word_to_vec_map) #predict is defined in emo_utils.py

    return pred, W, b

In [11]:
pred, W, b = model(X_train, Y_train, word_to_vec_map, num_iterations=10) #Train it to 200 epochs for good results

Epoch: 0 --- cost = 1.651687023969562
Accuracy: 0.3712121212121212


In [12]:
print("Training set:")
pred_train = predict(X_train, Y_train, W, b, word_to_vec_map)
print('Test set:')
pred_test = predict(X_test, Y_test, W, b, word_to_vec_map)

Training set:
Accuracy: 0.7575757575757576
Test set:
Accuracy: 0.6964285714285714


In [13]:
def predict_single(sentence, W=W, b=b, word_to_vec_map=word_to_vec_map):
    """
    Given X (sentences) and Y (emoji indices), predict emojis and compute the accuracy of your model over the given set.
    
    Arguments:
    X -- input data containing sentences, numpy array of shape (m, None)
    Y -- labels, containing index of the label emoji, numpy array of shape (m, 1)
    
    Returns:
    pred -- numpy array of shape (m, 1) with your predictions
    """
    avg = sentence_to_avg(sentence, word_to_vec_map)
    z = np.matmul(W, avg) + b
    pred = np.argmax(softmax(z))
    return pred

In [14]:
label_to_emoji(int(predict_single("I love you")))

'❤️'

In [15]:
X_my_sentences = np.array(["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy"])
Y_my_labels = np.array([[0], [0], [2], [1], [4],[3]])

pred = predict(X_my_sentences, Y_my_labels , W, b, word_to_vec_map)
print_predictions(X_my_sentences, pred)

Accuracy: 1.0

i adore you ❤️
i love you ❤️
funny lol 😄
lets play with a ball ⚾
food is ready 🍴
not feeling happy 😞


### Emojify V2 - Using LSTM in Keras

In [16]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, Embedding
from tensorflow.keras.models import Model

<img src="images/embedding1.png" style="width:700px;height:250px;">

In [17]:
def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()` (See Figure above). 
    
    Arguments:
    X -- array of sentences (strings), of shape (m, 1)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. 
    
    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """
    m = X.shape[0]
    X_indices = np.zeros((m, max_len))
    for i in range(m):
        sentence = X[i].lower().split()
        for j, word in enumerate(sentence):
            if word in list(word_to_index.keys()):
                X_indices[i][j] = word_to_index[word]
    return X_indices

In [18]:
X1 = np.array(["funny lol", "lets play baseball", "food is ready for you"])
X1_indices = sentences_to_indices(X1, word_to_index, max_len=5)
print("X1 =", X1)
print("X1_indices =\n", X1_indices)

X1 = ['funny lol' 'lets play baseball' 'food is ready for you']
X1_indices =
 [[155345. 225122.      0.      0.      0.]
 [220930. 286375.  69714.      0.      0.]
 [151204. 192973. 302254. 151349. 394475.]]


#### Pretrained Embedding Layer

In [19]:
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_size = len(word_to_index) + 1
    any_word = list(word_to_vec_map.keys())[0]
    emb_dim = word_to_vec_map[any_word].shape[0]

    emb_matrix = np.zeros((vocab_size, emb_dim))
    for word, vec in word_to_vec_map.items():
        emb_matrix[word_to_index[word]] = vec
    
    embedding_layer = Embedding(vocab_size, emb_dim, trainable=False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer

In [20]:
embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
print("weights[0][1][1] =", embedding_layer.get_weights()[0][0][1])
print("Input_dim", embedding_layer.input_dim)
print("Output_dim",embedding_layer.output_dim)

weights[0][1][1] = 0.0
Input_dim 400001
Output_dim 50


<img src="images/emojifier-v2.png" style="width:700px;height:400px;"> <br>

In [21]:
def Emojify_V2(input_shape, word_to_vec_map, word_to_index):
    """
    Function creating the Emojify-v2 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    inputs = Input(shape=input_shape, dtype='int32')
    X = pretrained_embedding_layer(word_to_vec_map, word_to_index)(inputs)
    X = LSTM(units=128, return_sequences=True)(X)
    X = Dropout(rate=0.5)(X)
    X = LSTM(units=128, return_sequences=False)(X)
    X = Dropout(rate=0.5)(X)
    X = Dense(units=5, activation='softmax')(X)
    return Model(inputs=inputs, outputs=X, name='SPN_Emojify')

In [22]:
model = Emojify_V2((maxLen,), word_to_vec_map, word_to_index)
model.summary()

Model: "SPN_Emojify"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10)]              0         
                                                                 
 embedding_1 (Embedding)     (None, 10, 50)            20000050  
                                                                 
 lstm (LSTM)                 (None, 10, 128)           91648     
                                                                 
 dropout (Dropout)           (None, 10, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 5)                 

In [23]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [24]:
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
Y_train_oh = convert_to_one_hot(Y_train, C = 5)

In [25]:
model.fit(X_train_indices, Y_train_oh, epochs=100, batch_size=32, shuffle=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1fb662eb1c0>

In [26]:
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)
Y_test_oh = convert_to_one_hot(Y_test, C = 5)
loss, acc = model.evaluate(X_test_indices, Y_test_oh)
print()
print("Test accuracy = ", acc)


Test accuracy =  0.8214285969734192


In [27]:
sentence = np.array(["I am happy"])
indices = sentences_to_indices(sentence, word_to_index, maxLen)
label_to_emoji(np.argmax(model.predict(indices)))



'😄'

In [34]:
X_my_sentences = np.array(["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy", "definitely feeling happy"])
m = X_my_sentences.shape[0]
indices = sentences_to_indices(X_my_sentences, word_to_index, maxLen)
for i in range(m):
    pred = label_to_emoji(np.argmax(model.predict(indices[i].reshape(1, maxLen))))
    print(f"{X_my_sentences[i]} {pred}")

i adore you ❤️
i love you ❤️
funny lol 😄
lets play with a ball ⚾
food is ready 🍴
not feeling happy 😞
definitely feeling happy 😄


In [None]:
from tensorflow.keras.models import save_model

In [None]:
# save_model(model, "./SPN_Emojify.keras")