In [1]:
#@title Env setting
from __future__ import print_function, division, unicode_literals
# import example_helper
import json
import csv
import argparse

import numpy as np
import emoji

from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH

# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')

def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]

# Tokenizing using dictionary
with open(VOCAB_PATH, 'r') as f:
    vocabulary = json.load(f)



In [2]:
#@title Write the input text, and the model will recommend some emojis for you
# examples/text_emojize.py

input_text = 'I am so happy for you.' #@param 
maxlen = 30 #@param

st = SentenceTokenizer(vocabulary, maxlen)

# Loading model
model = torchmoji_emojis(PRETRAINED_PATH)
# Running predictions
tokenized, _, _ = st.tokenize_sentences([input_text])

# print(f"tokenized: {tokenized}")

# Get sentence probability
prob = model(tokenized)[0]

# print(f"prob: {prob}")

# Top emoji id
emoji_ids = top_elements(prob, 5)

# print(f"emoji_ids: {emoji_ids}")

# map to emojis
emojis = map(lambda x: EMOJIS[x], emoji_ids)

print(emoji.emojize("{} {}".format(input_text,' '.join(emojis)), use_aliases=True))

I am so happy for you. ☺ 😊 💙 💛 ❤


In [13]:
#@title Text encoding
# examples/encode_texts.py
import json

from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_feature_encoding
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH

TEST_SENTENCES = ['I love mom\'s cooking',
                  'I love my parents',
                  'You are beautiful',
                  'You are so beautiful']

maxlen = 30
batch_size = 32

with open(VOCAB_PATH, 'r') as f:
    vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

# print('Loading model from {}.'.format(PRETRAINED_PATH))
model = torchmoji_feature_encoding(PRETRAINED_PATH)
# print(model)

# print('Encoding texts..')
encoding = model(tokenized)

for i, sent in enumerate(TEST_SENTENCES):
    print(f"Sentence[{i}] {sent}:\t {encoding[i, :5]}")

Sentence[0] I love mom's cooking:	 [-0.00852921  0.05861181  0.          0.          0.        ]
Sentence[1] I love my parents:	 [-0.00868416  0.0420589   0.          0.          0.        ]
Sentence[2] You are beautiful:	 [-0.02398385  0.02189129  0.          0.         -0.00982201]
Sentence[3] You are so beautiful:	 [-0.0118494   0.01473236  0.          0.         -0.00892754]


In [34]:
import pickle

# with open('data/SS-Twitter/twitter_vocab.pickle', 'rb') as f:
with open('data/PsychExp/combined_vocab.pickle', 'rb') as f:
    x = pickle.load(f)

In [35]:
x

{'dataset': 'PsychExp',
 'train_texts': array([[  63,   18, 2597, ...,    0,    0,    0],
        [  63,   18,   58, ...,    0,    0,    0],
        [  63,   18,  470, ...,    0,    0,    0],
        ...,
        [  63,   18,   25, ...,    0,    0,    0],
        [  18,  837, 1779, ...,    0,    0,    0],
        [ 629,   41,  135, ...,   84,    1,   11]], dtype=uint16),
 'val_texts': array([[   63,    18,   627, ...,     0,     0,     0],
        [   20,   273,    15, ...,     0,     0,     0],
        [   18,   837,    15, ...,  9060, 26146,    16],
        ...,
        [   64,    15,   581, ...,    53,    23,   196],
        [   63,    15,  3416, ...,     0,     0,     0],
        [   63,    18,  3685, ...,    28,    10,   423]], dtype=uint16),
 'test_texts': array([[  18,   25,   44, ...,   86, 2025,   62],
        [  41,  520,   58, ..., 1252,   21,   10],
        [  18, 1144,   41, ...,    0,    0,    0],
        ...,
        [4052,   36,   91, ...,    0,    0,    0],
        [66