# Word2Vec with emojis demo
The goal of this demo is to experience the power of word2vec which works well also with emojis. 

In [21]:
from data_cleaning import loader
from gensim.models import Word2Vec

### Load word2vec model
For training see word2vec_hierarchical_clustering. The model is trained on 800k tweets cleaned, with a window size of 10 (for more semantic similarities), a feature vector of size 200 and otherwise standard parameters. 

In [22]:
emoji_model = Word2Vec.load('emoji.embedding')

### Load emojis

In [23]:
def convertEmojis(df):
    """Converts emojis df to printable format """
    emojis = list(map(lambda x: bytes("{}{}".format(*x), 'ascii').decode('unicode-escape'), zip(list(df.byteCode1), list(df.byteCode2)))) 
    return emojis

In [24]:
def subset_present(df, model):
    # Select only emojis that are in our model (ie in the corpus)
    return df[df["emojis"].map(lambda x: x in model.vocab.keys())]

In [25]:
emojis_df = loader("./data/emoji_webscraped_expanded.json")
emojis_df["emojis"] = convertEmojis(emojis_df) 
emojis_df_sub = subset_present(emojis_df, emoji_model) # Subset to emojis present in our data (at least 100 times)

## Pick one emoji from this list

In [26]:
for emoji in emojis_df_sub.emojis:
    print(emoji, end="")

😀😁😎👲👸💂👼👯😍💆🙍🙅🙆💁🙋🙇😘🙌🙏🚶🏃💃💏💑👫👬👭💪👈👉☝👆👇✌😚✊✋👊👌👍👎👋👏👐☺💅👀👅💋💘❤💓💔💕💖💗💙💚💛💜💞💤💢💣💥😇💦💨💫💭😂😐👑😑💍💎🐶😶🐱🐯🐴🐷🐑😏🐪🐫🐘😣🐻🐾😥😮🐬🌸😃😯🌹🌺🌻🌲😪🌴🌵🌾🌿🍁🍂🍃😫😴😌🍗🍔🍟🍕🍜🍣😛🍦🍩🍪🎂🍰🍫😜☕🍷😝🍸🍹🍺🍻🍴😄😒😓😔😕🌉😖😷🚗🚙✈😅😲😞😟😤😢🌙🌚😭☀🌞🌟☁⛅😦🌀😧🌈☔⚡❄⛄😨🔥💧🌊🎃✨🎈🎉😆😩🎊🎀🎁😬⚽⚾🏀🏈😰😱🏊😳🏆😵♥😡🔊🔕🎼😠🎵🎶🎤🎧😈👿😉👹🎥💀📖📚💰💵💸👻👽💩😊😸💉💊🔪🔫😹😻🙀🙈✔🙉😋🙊💲❗👨💯👶

## Drag and drop emojis here

In [47]:
emoji_model.most_similar(positive = ['👑', "girl"], negative= ["guy"], topn=1)

[('👸', 0.6423957943916321)]

## Examples

In [30]:
emoji_model.most_similar(positive = ['👑', "girl"], negative= ["guy"], topn=1)

[('👸', 0.6423957943916321)]

In [29]:
emoji_model.most_similar(positive = ['🐪'], negative= [], topn=1)

[('🐫', 0.9589635133743286)]

In [31]:
emoji_model.most_similar(positive = ['🍺'], negative= [], topn=1)

[('🍸', 0.8535861968994141)]

In [32]:
emoji_model.most_similar(positive = ['🍻'], negative= [], topn=1)

[('🍸', 0.8616750836372375)]

In [37]:
emoji_model.most_similar(positive = ['🍴'], negative= ["🍺"], topn=10)

[('dessert', 0.35507333278656006),
 ('cheese', 0.3402734696865082),
 ('pancakes', 0.33361250162124634),
 ('pasta', 0.33213138580322266),
 ('cookies', 0.3320840001106262),
 ('chowder', 0.3229526877403259),
 ('#dinner', 0.321743905544281),
 ('lunch', 0.32160159945487976),
 ('bacon', 0.31315314769744873),
 ('grilled', 0.31165188550949097)]

In [39]:
emoji_model.most_similar(positive = ['sport', "🏆"], negative= [], topn=10)

[('fitness', 0.5639835596084595),
 ('💪', 0.5312219858169556),
 ('🏈', 0.5184454917907715),
 ('⚽', 0.5005420446395874),
 ('hr', 0.48402613401412964),
 ('24', 0.48130345344543457),
 ('🎥', 0.4729393720626831),
 ('🏀', 0.4544886648654938),
 ('🎀', 0.4488743543624878),
 ('👸', 0.4466416835784912)]

In [45]:
emoji_model.most_similar(positive = ["snow"], negative= [], topn=10)

[('❄', 0.565323531627655),
 ('sweater', 0.4915381073951721),
 ('clouds', 0.48315128684043884),
 ('winter', 0.4809408485889435),
 ('☁', 0.4755793809890747),
 ('corn', 0.4643056392669678),
 ('rain', 0.4608880281448364),
 ('☔', 0.4540202021598816),
 ('cinnamon', 0.4460812211036682),
 ('⛅', 0.4455045163631439)]

In [46]:
emoji_model.most_similar(positive = ["💲", "💍"], negative= [], topn=10)

[('💑', 0.6143924593925476),
 ('💏', 0.566390872001648),
 ('🌺', 0.529681384563446),
 ('👫', 0.5216408371925354),
 ('💓', 0.5182384848594666),
 ('💎', 0.5180035829544067),
 ('👬', 0.5043045282363892),
 ('🎀', 0.4919293522834778),
 ('🐻', 0.4892832934856415),
 ('💘', 0.4833012521266937)]