In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

# VISUALIZATION
import math
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML

def rgb_to_hex(rgb):
    return '#%02x%02x%02x' % rgb
    
def attention2color(attention_score):
    r = 255 - int(attention_score * 255)
    color = rgb_to_hex((255, r, r))
    return str(color)

In [13]:
import torch
import numpy as np

from trainer import load_embeddings
from lib.tokenizer import preprocessor
from lib.config import MODEL_EC, DEVICE
from lib.data_utils import vectorize

labels = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust', 'neutral']
model_path = '/home/houyu/learning/FinalProject/out/model/EmotionClassification_0.5900_2019-05-06_00:51.model'
model_conf = MODEL_EC
max_length = 65  # 85 train 65 dev 58 test

test_data = [
    "y'all don't understand. this woman's wrath is REAL. ðŸŒ¹ðŸ–¤",
    "@andyfleming83 Bastard squirrels. ðŸ˜¡",
    "I literally love Paul so much #BB19 #pissed ðŸ˜‚"
]

'''
labels = [
    [1 0 1 0 0 0 0 0 0 0 0],
]
'''

# Loading model
model = torch.load(model_path)
model.eval()
word2idx, _, embeddings = load_embeddings(model_conf)


for sentence, label in zip(test_data, labels): 
    
    input_text = sentence
    
    # Pre-processing inputs
    pro_sent = preprocessor(input_text)

    # Embedding and vectorize
    sample = vectorize(pro_sent, word2idx, max_length)

    # Processing to get model inputs
    samples = []
    lengths = []
    samples.append(sample)
    lengths.append(len(pro_sent))

    samples = torch.tensor(np.asarray(samples)).to(DEVICE)
    lengths = torch.tensor(np.asarray(lengths)).to(DEVICE)

    # Running model
    outputs, attentions = model(samples, lengths)
    posts = outputs.data.cpu().numpy()
    predicted = np.clip(np.sign(posts), a_min=0, a_max=None)    # 1   1   0   0   0   0   1   0   0   0   1
    predicted = predicted.astype(np.int32)    

    atten_array = attentions.data.cpu().numpy()    
    
    token_attention_dic = {}
    for idx, token in enumerate(pro_sent):
        token_attention_dic[token] = math.sqrt(atten_array[idx])
    
    # print(token_attention_dic)
    
    # Build HTML String to viualize attentions
    html_text = "<hr><p style='font-size: large'>Text: </br> </br>"
    for token, attention in token_attention_dic.items():
        html_text += "<span style='background-color:{};'>{} <span> ".format(attention2color(attention),
                                                                            token)
    html_text += "</p> </br>"
    
    # Display text enriched with attention scores 
    display(HTML(html_text))

Loading word embeddings: ntua_twitter_affect_310 ...
Loaded word embeddings from cache.
