In [85]:
# Evaluating sentiments with ACT

In [94]:
import pandas as pd
import numpy as np
import os

First, for an example sentence, we extract EPA values. A simple sentence is:

A child hugs the mother.

In [95]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [96]:
with np.load('../database/matrices.npz') as data:
    fmat = data['fmat']
    mmat = data['mmat']
M = mmat    

In [97]:
data_path = os.path.join(os.path.curdir, os.path.pardir, 'database')

In [98]:
vocab = pd.read_csv(os.path.join(data_path, 'epa.txt'), header=None)
vocab.columns = ['word', 'e1', 'p1', 'a1', 'e2', 'p2', 'a2', 'tmp']
labels = ['e1', 'p1', 'a1', 'e2', 'p2', 'a2']

In [99]:
vocab[labels] = vocab[labels].apply(lambda x: x/4.)
vocab[labels]= vocab[labels].clip(-1, None)

In [100]:
emo_list = pd.read_csv(os.path.join(data_path, 'basic_emo.txt'), header=None)

In [101]:
emotions = vocab[vocab.word.isin(emo_list[0].values.tolist())]

In [102]:
words = list(vocab.word.values)

In [103]:
keys = {i:w for i, w in enumerate(words)}

In [104]:
epa_emotions = emotions[['e1', 'p1', 'a1']].values

In [105]:
epa_emotions

array([[-0.4675, -0.0375, -0.035 ],
       [-0.3625, -0.1825, -0.0625],
       [-0.535 , -0.4375, -0.0275],
       [ 0.6175,  0.3125,  0.2425],
       [-0.47  , -0.365 , -0.3125],
       [ 0.295 ,  0.11  ,  0.2375]])

In [106]:
def get_epa(w, vocab):
    row = vocab[vocab.word==w].iloc[0]    
    _epa = row[['e1', 'p1', 'a1']].values
    return dotdict({'e': _epa[0], 'p': _epa[1], 'a': _epa[2]})

In [110]:
comp_distance = lambda x, y: np.linalg.norm(x-y, axis=1)

def get_sentiments(subj='mother', verb='hug', obj='child', emo_display=5):
    s = get_epa(subj, vocab)
    o = get_epa(obj, vocab)
    v = get_epa(verb, vocab)
    
    t = np.array([1, s.e, s.p, s.a, v.e, v.p, v.a, o.e, o.p, o.a, 
              s.e*v.e, s.e*v.p, s.e*v.a, s.p*v.e, s.p*v.p, s.p*o.a, s.a*v.a,
              v.e*o.e, v.e*o.p, v.p*o.e, v.p*o.p, v.p*o.a, v.a*o.e, v.a*o.p,
              s.e*v.e*o.e, s.e*v.p*o.p, s.p*v.p*o.p, s.p*v.p*o.a, s.a*v.a*o.a])

    tau = np.dot(M/4., t)
    
    result_emotions = [[], []]
    for i, (a, b) in enumerate([(0,3), (6,9)]):
        distances = comp_distance(epa_emotions, tau[a:b])
        
        for j in np.argsort(distances):
            result_emotions[i].append([emotions.iloc[j].word, distances[j]])

    return tau, result_emotions

In [111]:
tau, result = get_sentiments('girlfriend', 'molest', 'boyfriend')

In [112]:
print tau
result

[-0.04168841  0.04257797  0.14676558 -0.10063576  0.07954436  0.0747886
  0.03061993 -0.10974096  0.03578582]


[[['surprised', 0.35515848939308775],
  ['disgusted', 0.44426596642146909],
  ['angry', 0.46985818554778985],
  ['fearful', 0.71007018719081483],
  ['happy', 0.71871576094595335],
  ['sad', 0.74866246574496031]],
 [['surprised', 0.39858690843008598],
  ['disgusted', 0.41170044459540839],
  ['angry', 0.50828422026835229],
  ['fearful', 0.65677772762263387],
  ['sad', 0.66112063940172638],
  ['happy', 0.75196170260560857]]]

In [None]:
def get_person_sentiment(person):
    if person ==

In [102]:
a,b =0,3
distances = np.linalg.norm(epa_emotions-tau[a:b], axis=1)
epa_emotions

array([[-1.87, -0.15, -0.14],
       [-1.45, -0.73, -0.25],
       [-2.14, -1.75, -0.11],
       [ 2.47,  1.25,  0.97],
       [-1.88, -1.46, -1.25],
       [ 1.18,  0.44,  0.95]])

In [103]:
vec = tau[6:]
sim = np.dot(epa_emotions, vec) / (np.linalg.norm(epa_emotions, axis=1)*np.linalg.norm(vec))
print sim
print sim[np.argsort(sim)]
print emotions.iloc[np.argsort(sim)].word

[-0.58433439 -0.47341221 -0.26900793  0.5949265  -0.58490151  0.79794589]
[-0.58490151 -0.58433439 -0.47341221 -0.26900793  0.5949265   0.79794589]
1961          sad
1613        angry
1709    disgusted
1748      fearful
1778        happy
2026    surprised
Name: word, dtype: object


In [104]:
epa_emotions

array([[-1.87, -0.15, -0.14],
       [-1.45, -0.73, -0.25],
       [-2.14, -1.75, -0.11],
       [ 2.47,  1.25,  0.97],
       [-1.88, -1.46, -1.25],
       [ 1.18,  0.44,  0.95]])

In [120]:
epa_emotions

array([[-0.4675, -0.0375, -0.035 ],
       [-0.3625, -0.1825, -0.0625],
       [-0.535 , -0.4375, -0.0275],
       [ 0.6175,  0.3125,  0.2425],
       [-0.47  , -0.365 , -0.3125],
       [ 0.295 ,  0.11  ,  0.2375]])

In [121]:
emotions

Unnamed: 0,word,e1,p1,a1,e2,p2,a2,tmp
1613,angry,-0.4675,-0.0375,-0.035,-0.4925,0.12,0.16,10 100000000 000
1709,disgusted,-0.3625,-0.1825,-0.0625,-0.475,-0.1475,-0.015,10 100010000 000
1748,fearful,-0.535,-0.4375,-0.0275,-0.5525,-0.4225,-0.04,10 100000000 000
1778,happy,0.6175,0.3125,0.2425,0.76,0.5975,0.43,10 100010000 000
1961,sad,-0.47,-0.365,-0.3125,-0.565,-0.4675,-0.3375,10 100000000 000
2026,surprised,0.295,0.11,0.2375,0.3475,0.1,0.42,10 000100000 000


In [139]:
for w, vec in zip(emotions.word.values, epa_emotions):
    exp = '"{}*E+{}*P+{}*A"'.format(vec[0], vec[1], vec[2])
    print w.upper(),'=', exp

ANGRY = "-0.4675*E+-0.0375*P+-0.035*A"
DISGUSTED = "-0.3625*E+-0.1825*P+-0.0625*A"
FEARFUL = "-0.535*E+-0.4375*P+-0.0275*A"
HAPPY = "0.6175*E+0.3125*P+0.2425*A"
SAD = "-0.47*E+-0.365*P+-0.3125*A"
SURPRISED = "0.295*E+0.11*P+0.2375*A"
