In [1]:
import numpy as np
import pickle
import utils
import time
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tqdm import tqdm

  from ._conv import register_converters as _register_converters


In [2]:
emoji_vectors = pickle.load(open('data/emoji_vectors.p', 'rb'))
moby_dick_vectors = pickle.load(open('data/moby_dick_vectors.p', 'rb'))
moby_dick_sents = pickle.load(open('data/moby_dick_sents.p', 'rb'))

emoji_embedding = np.array([v for v in emoji_vectors.values()])

In [5]:
# tf.reset_default_graph()
# sess.close()
sess = tf.InteractiveSession()

In [6]:
batch_size = 128
nodes = 300
embed_size = 300
x_seq_length = 32

inputs = tf.placeholder(tf.float32, (None, x_seq_length, embed_size), 'inputs')
input_mean = tf.nn.l2_normalize(tf.reduce_mean(inputs, axis=1), axis=1, name='input_mean')

output_embedding = tf.constant(emoji_embedding, name='output_embedding')

with tf.name_scope('network'):
    lstm_cell = tf.contrib.rnn.LSTMCell(nodes, name='lstm')
    lstm_outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs=inputs, dtype=tf.float32)

    logits = tf.layers.dense(lstm_outputs, units=len(emoji_vectors), activation='softmax', name='dense') 
    outputs = utils.matmul3d(logits, output_embedding)

    output_mean = tf.nn.l2_normalize(tf.reduce_mean(outputs, axis=1), axis=1)

with tf.name_scope("optimization"):
    loss = tf.losses.cosine_distance(input_mean, output_mean, axis=1)
    optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss)
    
tf.summary.scalar('loss', loss)
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('models/lstm_moby_dick', sess.graph)

In [7]:
train_sents, test_sents = train_test_split(moby_dick_sents, random_state=42)

In [8]:
X_train = [[moby_dick_vectors[w] for w in s] for s in train_sents]
X_test = [[moby_dick_vectors[w] for w in s] for s in test_sents]

In [50]:
print(len(X_train), len(X_test))

7544 2515


In [19]:
def batch_generator(X, batch_size):
    i = 0
    while i < len(X):
        yield X[i:i+batch_size]
        i += batch_size

In [17]:
writer = tf.summary.FileWriter('models/lstm_moby_dick/2', sess.graph)

In [20]:
sess.run(tf.global_variables_initializer())
epochs = 100
start = time.time()

for i in range(epochs):
    t = time.time()
    losses = []
    for X in utils.batch_generator(X_train, batch_size):
        _, l, summary = sess.run([optimizer, loss, merged], feed_dict={inputs:X})
        losses.append(l)
    writer.add_summary(summary, global_step=i)
#     if l < .0005:
#         print('Epoch {:3} Loss: {:>6.3f} Epoch duration: {:>6.3f}s'.format(i, l, time.time() - t))
#         break
#     elif not i%10:
    print('Epoch {:3} Average Loss: {:>6.3f} Epoch duration: {:>6.3f}s'.format(i, np.mean(losses, axis=-1), time.time() - t))

saver = tf.train.Saver()
saver.save(sess, 'models/lstm_moby_dick/model2')
print('Total training time:', time.time()-start)

predictions = []
losses = []
emoji_keys = list(emoji_vectors.keys())
for x in tqdm(X_test):
    lo, l = sess.run([logits, loss], feed_dict={inputs:np.array(x).reshape(-1, 32, 300)})
    pred = np.argmax(lo, axis=2).reshape(32,)
    predictions.append([emoji_keys[i] for i in pred])
    losses.append(l)

print('Average test loss:', np.mean(losses, axis=-1))

for i in range(10):
    print('Test sentence:', ' '.join(w for w in test_sents[i] if w))
    print('Prediction:', set(predictions[i]))
    print('Cosine distance:', losses[i])
    print()

Epoch   0 Average Loss:  0.408 Epoch duration: 36.128s
Epoch   1 Average Loss:  0.382 Epoch duration: 35.834s
Epoch   2 Average Loss:  0.380 Epoch duration: 35.767s
Epoch   3 Average Loss:  0.375 Epoch duration: 35.723s
Epoch   4 Average Loss:  0.362 Epoch duration: 36.544s
Epoch   5 Average Loss:  0.353 Epoch duration: 36.452s
Epoch   6 Average Loss:  0.346 Epoch duration: 35.444s
Epoch   7 Average Loss:  0.340 Epoch duration: 35.490s
Epoch   8 Average Loss:  0.336 Epoch duration: 35.583s
Epoch   9 Average Loss:  0.334 Epoch duration: 35.638s
Epoch  10 Average Loss:  0.331 Epoch duration: 35.519s
Epoch  11 Average Loss:  0.329 Epoch duration: 35.629s
Epoch  12 Average Loss:  0.327 Epoch duration: 35.406s
Epoch  13 Average Loss:  0.325 Epoch duration: 36.474s
Epoch  14 Average Loss:  0.323 Epoch duration: 36.040s
Epoch  15 Average Loss:  0.322 Epoch duration: 35.474s
Epoch  16 Average Loss:  0.321 Epoch duration: 35.507s
Epoch  17 Average Loss:  0.319 Epoch duration: 35.461s
Epoch  18 

  0%|          | 0/2515 [00:00<?, ?it/s]

Epoch  99 Average Loss:  0.290 Epoch duration: 35.400s
Total training time: 3568.2733938694


100%|██████████| 2515/2515 [00:38<00:00, 65.45it/s]

Average test loss: 0.30574346
Test sentence: an old pike head sir there were seams dents in it
Prediction: {'🈁', '🐬', '🤕', '🌮', '🈴', '🏰', '⁉'}
Cosine distance: 0.21970135

Test sentence: this one poor hunt then the best lance out all surely he will not hang back when every foremast hand has clutched whetstone
Prediction: {'🈁', '🐬', '👏', '🐧', '🆙', '✳', '🏰', '⚜', '🤼', '🔂'}
Cosine distance: 0.12055719

Test sentence: drop them over fore aft
Prediction: {'🦏', '🐬', '🆙', '🏰', '⚜'}
Cosine distance: 0.35074228

Test sentence: in the infancy the first settlement the emigrants were several times saved from starvation by the benevolent biscuit the whale ship luckily dropping an anchor in their waters
Prediction: {'⚰', '⏮', '⛵', '〽', '🆘', '🐬', '🈁', '🕦', '🆙', '✳', '🏰', '♌'}
Cosine distance: 0.17174953

Test sentence: mighty whales which swim in sea water have sea oil swimming in them
Prediction: {'🐠', '⛵', '🥘', '🐬', '⛽', '🈁', '🐧', '🏰', '🤼'}
Cosine distance: 0.17046505

Test sentence: round round th




In [9]:
saver = tf.train.Saver()

In [24]:
saver.restore(sess, 'models/lstm_moby_dick/model2')

INFO:tensorflow:Restoring parameters from models/lstm_moby_dick/model2


In [25]:
predictions = []
losses = []
emoji_keys = list(emoji_vectors.keys())
for x in tqdm(X_test):
    lo, l = sess.run([logits, loss], feed_dict={inputs:np.array(x).reshape(-1, 32, 300)})
    pred = np.argmax(lo, axis=2).reshape(32,)
    predictions.append([emoji_keys[i] for i in pred])
    losses.append(l)

100%|██████████| 2515/2515 [00:29<00:00, 86.40it/s]


In [28]:
raw_train = pickle.load(open('models/raw_train.p', 'rb'))
raw_test = pickle.load(open('models/raw_test.p', 'rb'))

In [30]:
for i in range(10):
    print('Raw: sentence:', ' '.join(w for w in raw_test[i] if w))
    print('Test sentence:', ' '.join(w for w in test_sents[i] if w))
    print('Prediction:', set(predictions[i]))
    print('Cosine distance:', losses[i])
    print()

Raw: sentence: " Welding an old pike - head , sir ; there were seams and dents in it ."
Test sentence: an old pike head sir there were seams dents in it
Prediction: {'🏰', '🈁', '🐬', '🈴', '⁉', '🌮', '🤕'}
Cosine distance: 0.21970135

Raw: sentence: From this one poor hunt , then , the best lance out of all Nantucket , surely he will not hang back , when every foremast - hand has clutched a whetstone ?
Test sentence: this one poor hunt then the best lance out all surely he will not hang back when every foremast hand has clutched whetstone
Prediction: {'🆙', '🏰', '🈁', '🐬', '✳', '🔂', '⚜', '🤼', '🐧', '👏'}
Cosine distance: 0.12055719

Raw: sentence: drop them over , fore and aft .
Test sentence: drop them over fore aft
Prediction: {'🏰', '🆙', '🐬', '⚜', '🦏'}
Cosine distance: 0.35074228

Raw: sentence: Moreover , in the infancy of the first Australian settlement , the emigrants were several times saved from starvation by the benevolent biscuit of the whale - ship luckily dropping an anchor in their 

In [23]:
train_sents[50]

['have',
 'striven',
 'be',
 'more',
 'than',
 'be',
 'this',
 'world',
 's',
 'or',
 'mine',
 'own',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '']