To run any of this you need to have the deepmoji files downloaded

In [None]:
# @inproceedings{felbo2017,
  title={Using millions of emoji occurrences to learn any-domain representations for detecting sentiment, emotion and sarcasm},
  author={Felbo, Bjarke and Mislove, Alan and S{\o}gaard, Anders and Rahwan, Iyad and Lehmann, Sune},
  booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  year={2017}
}

In [None]:
# -*- coding: utf-8 -*-

""" Use DeepMoji to score texts for emoji distribution.

The resulting emoji ids (0-63) correspond to the mapping
in emoji_overview.png file at the root of the DeepMoji repo.

Writes the result to a csv file.
"""
from __future__ import print_function, division
import example_helper
import json
import csv
import numpy as np
import pandas as pd
from deepmoji.sentence_tokenizer import SentenceTokenizer
from deepmoji.model_def import deepmoji_emojis
from deepmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH

OUTPUT_PATH = 'test.csv'

In [15]:
df = pd.read_csv('../../../TrumpNov29WithSScore.csv')
df = df.dropna()
df.head()

Unnamed: 0,created_at,id_str,is_retweet,source,text,sent_score
0,11/29/18 12:32,1.06812e+18,False,Twitter for iPhone,Billions of Dollars are pouring into the coffe...,-0.1154
1,11/29/18 12:16,1.06812e+18,False,Twitter for iPhone,When will this illegal Joseph McCarthy style W...,-0.3793
2,11/29/18 11:54,1.06811e+18,False,Twitter for iPhone,Did you ever see an investigation more in sear...,-0.5769
3,11/29/18 11:37,1.06811e+18,False,Twitter for iPhone,General Motors is very counter to what other a...,0.0385
4,11/29/18 4:39,1.068e+18,False,Twitter for iPhone,So much happening with the now discredited Wit...,-0.3636


In [25]:
sentences = []
dates = []
for i, row in df.iterrows():
    try:
        if row['is_retweet']:
            continue
        cleanwords = []
        text = row['text'].split()
        for i in range(len(text)):
            if text[i].find("http") >= 0:
                continue
            try:
                word = unicode(text[i])
                cleanwords.append(text[i])
            except Exception as e:
                word = text[i].decode('utf-8')
                word = word.encode('ascii', 'ignore')
                cleanwords.append(word)
                
        if len(cleanwords) > 1:
            sentences.append(unicode(' '.join(cleanwords)))
            dates.append(row['created_at'])
    except Exception as e:
        print(e)
        
for i in sentences:
    if len(i.split()) == 1:
        print(i)

In [26]:
print(len(dates), len(sentences))

34856 34856


In [27]:
# TEST_SENTENCES = [u'I love mom\'s cooking',
#                   u'I love how you never reply back..',
#                   u'I love cruising with my homies',
#                   u'I love messing with yo mind!!',
#                   u'I love you and now you\'re just gone..',
#                   u'This',
#                   u'This is the shit']

TEST_SENTENCES = sentences



def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]


maxlen = 30
batch_size = 32

print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
with open(VOCAB_PATH, 'r') as f:
    vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

print('Loading model from {}.'.format(PRETRAINED_PATH))
model = deepmoji_emojis(maxlen, PRETRAINED_PATH)
model.summary()

print('Running predictions.')
prob = model.predict(tokenized)

# Find top emojis for each sentence. Emoji ids (0-63)
# correspond to the mapping in emoji_overview.png
# at the root of the DeepMoji repo.
print('Writing results to {}'.format(OUTPUT_PATH))
scores = []
for i, t in enumerate(TEST_SENTENCES):
    t_tokens = tokenized[i]
    t_score = [t]
    t_score.append(dates[i])
    t_prob = prob[i]
    ind_top = top_elements(t_prob, 5)
    t_score.append(sum(t_prob[ind_top]))
    t_score.extend(ind_top)
    t_score.extend([t_prob[ind] for ind in ind_top])
    scores.append(t_score)

with open(OUTPUT_PATH, 'wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',', lineterminator='\n')
    writer.writerow(['created_at','Text', 'Top5%',
                     'Emoji_1', 'Emoji_2', 'Emoji_3', 'Emoji_4', 'Emoji_5',
                     'Pct_1', 'Pct_2', 'Pct_3', 'Pct_4', 'Pct_5'])
    for i, row in enumerate(scores):
        try:
            writer.writerow(row)
        except Exception as e:
            print("Exception at row {}!".format(i))
            print(e)


Tokenizing using dictionary from /Users/JackBelluche/Desktop/Code/BigData/finalProject/DM/DeepMoji/model/vocabulary.json
Loading model from /Users/JackBelluche/Desktop/Code/BigData/finalProject/DM/DeepMoji/model/deepmoji_weights.hdf5.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 30)           0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 30, 256)      12800000    input_6[0][0]                    
__________________________________________________________________________________________________
activation_6 (Activation)       (None, 30, 256)      0           embedding[0][0]                  
________________________________________________________________________

In [28]:
data = pd.read_csv('test.csv')

In [29]:
data.head()

Unnamed: 0,created_at,Text,Top5%,Emoji_1,Emoji_2,Emoji_3,Emoji_4,Emoji_5,Pct_1,Pct_2,Pct_3,Pct_4,Pct_5
0,Billions of Dollars are pouring into the coffe...,11/29/18 12:32,0.290665,32,55,33,62,25,0.100459,0.071994,0.042611,0.038831,0.036771
1,When will this illegal Joseph McCarthy style W...,11/29/18 12:16,0.425414,32,46,55,34,27,0.105558,0.103549,0.080169,0.077162,0.058977
2,Did you ever see an investigation more in sear...,11/29/18 11:54,0.427425,32,55,12,41,25,0.157205,0.103903,0.064565,0.056425,0.045328
3,General Motors is very counter to what other a...,11/29/18 11:37,0.403117,32,55,19,25,37,0.129634,0.109936,0.073316,0.051891,0.038339
4,So much happening with the now discredited Wit...,11/29/18 4:39,0.292888,12,62,32,52,43,0.065822,0.065414,0.056404,0.055462,0.049786
