In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.utils import shuffle
model_sw = keras.models.load_model('../build/models/stocktwits_emotion.h5')
model_tw = keras.models.load_model('../build/models/twitter_emotion.h5')
seed_value = 0

def prepare_test_data(test_data_source, tokenizer, max_sequence_length=30):
    test_data = pd.read_parquet(test_data_source)
    test_data = test_data[test_data.label!=4]
    test_data = test_data.dropna(subset=['text'])
    test_texts, test_labels = test_data.text.tolist(), test_data.label.tolist()
    test_sequences = tokenizer.texts_to_sequences(test_texts)
    test_data = pad_sequences(
        test_sequences, padding='pre', maxlen=max_sequence_length)
    test_labels = to_categorical(np.asarray(test_labels))
    print('[+] Shape of test data tensor:', test_data.shape)
    print('[+] Shape of test label tensor:', test_labels.shape)
    return test_data, test_labels, test_texts

# we take out disgust; since that's not available in Text2Emotion

In [None]:
with open('data/tokenizer_stocktwits.pickle', 'rb') as handle:
    tokenizer_sw = pickle.load(handle)
with open('data/tokenizer_twitter.pickle', 'rb') as handle:
    tokenizer_tw = pickle.load(handle)
    
data, labels, texts = prepare_test_data('../emtract/data/hand_tagged_sample.parquet.snappy', tokenizer_sw, max_sequence_length=30)
data_tw, labels, texts = prepare_test_data('../emtract/data/hand_tagged_sample.parquet.snappy', tokenizer_tw, max_sequence_length=30)

In [None]:
y_prob = model_sw.predict(data)
y_prob_tw = model_tw.predict(data_tw)

y_prob = np.concatenate([y_prob[:, :4], y_prob[:, 5:]], axis=1)
y_prob_tw = np.concatenate([y_prob_tw[:, :4], y_prob_tw[:, 5:]], axis=1)
labels = np.concatenate([labels[:, :4], labels[:, 5:]], axis=1)

In [None]:
from sklearn.preprocessing import Normalizer
y_prob = Normalizer(norm='l1').fit_transform(y_prob)
y_prob_tw = Normalizer(norm='l1').fit_transform(y_prob_tw)
y_pred = y_prob.argmax(axis=-1)
y_pred_tw = y_prob_tw.argmax(axis=-1)

In [None]:
from sklearn.metrics import log_loss, accuracy_score
print(log_loss(labels, y_prob))
print(log_loss(labels, y_prob_tw)) # take out false 0s
print(accuracy_score(labels.argmax(axis=-1), y_pred))
print(accuracy_score(labels.argmax(axis=-1), y_pred_tw))

# Contrast it with Text2Emotion

In [None]:
# Now use 
import text2emotion as te

df = pd.read_parquet('../emtract/data/hand_tagged_sample.parquet.snappy')
df = df[df.label!=4]

def get_emotion(arg):
    emotions = te.get_emotion(arg)
    return [emotions['Happy'], emotions['Sad'], emotions['Angry'], emotions["Surprise"], emotions['Fear']]

In [None]:
import time
start = time.time()
df['emo'] = df['text'].apply(get_emotion)
end = time.time()

In [None]:
df['neutral'] = df['emo'].apply(lambda x : 1 if sum(x)==0 else 0)
df['happy'] = df['emo'].apply(lambda x : x[0]/sum(x) if sum(x)!=0 else 0)
df['sad'] = df['emo'].apply(lambda x : x[1]/sum(x) if sum(x)!=0 else 0)
df['anger'] = df['emo'].apply(lambda x : x[2]/sum(x) if sum(x)!=0 else 0)
df['surprise'] =  df['emo'].apply(lambda x : x[3]/sum(x) if sum(x)!=0 else 0)
df['fear'] =  df['emo'].apply(lambda x : x[4]/sum(x) if sum(x)!=0 else 0)
df['pred'] = df['emo'].apply(lambda x: np.argmax(x)+1 if sum(x)!=0 else 0)

In [None]:
print(log_loss(df.label.values, df[['neutral','happy','sad','anger','surprise','fear']].values))
print(accuracy_score(df.label.values, df['pred'].values))
# this seems really bad!