In [1]:
%load_ext autoreload
%autoreload 2

# Semantic Knowledge

This will process the transcripts/annotations (which would be considered autobiographical knowledge) to extract the meaning of the emotion words, i.e. semantic knowledge, as subclass of an nltk naive Bayes classifier
- the features/observations/data are a set of (question, answer) pairs
- the models/hypotheses are the emotion words
- the data gives us P(observation|emotion word)
- the task goal is to calculate P(emotion word|observation)
- the prior P(emotion word) = 1/(# of emotion words), i.e. a uniform, non-informative prior

In [24]:
from emo20q.semanticknowledge import SemanticKnowledge
k = SemanticKnowledge()

In [27]:
print(k.entities())

{'uninterested', 'curiosity', 'caring', 'scared', 'unsure', 'annoyance', 'let down', 'disappointment', 'sorrow', 'dumbfounded', 'aversion', 'insecurity', 'hope', 'admiration', 'jealousy', 'delirium', 'respect', 'conflicted', 'jubilance', 'adoration', 'surprise', 'stress', 'apathy', 'avarice', 'determination', 'relief', 'fear', 'happiness', 'hunger', 'euphoria', 'thrill', 'nostalgia', 'coldness', 'bravery', 'serenity', 'thankfulness', 'pity', 'pessimism', 'remorse', 'glee', 'annoyed', 'infuriation', 'exotic', 'downcast', 'confidence', 'ecstasy', 'obsession', 'tense', 'sleepiness', 'depression', 'frustration', 'relaxation', 'maudlin', 'upset', 'love', 'disgust', 'loneliness', 'exhaustion', 'ambivalence', 'aggravation', 'shyness', 'jilted', 'hopelessness', 'disillusionment', 'joyfulness', 'wariness', 'guilt', 'indifference', 'boredom', 'ambiguity', 'delight', 'accepting', 'maniacal', 'loved', 'gratefulness', 'being appalled', 'educated', 'cheerfulness', 'courage', 'discontentment', 'worry

In [26]:
print(k.most_informative_features(20))

[('e.valence==positive', 'no'), ('e==happiness', 'no'), ('e.valence==negative', 'yes'), ('associated(e,happiness)', 'no'), ('e.activation==high', 'no'), ('e.activation==high', 'other'), ('cause(e,feelGood)', 'other'), ('e.valence==negative', 'no'), ('e.duration==long', 'no'), ('e.valence==positive', 'other'), ('e==happiness', 'yes'), ('e==depression', 'yes'), ('cause(something,e)', 'no'), ('directed(e,otherPerson)', 'yes'), ('directed(e,otherPerson)', 'other'), ('directed(e,otherPerson)', 'no'), ('e.duration==long', 'other'), ('cause(something,e)', 'other'), ('similar(e,happiness)', 'other'), ('e==hope', 'yes')]


## semantic knowledge is subclassed from nltk.classify.naivebayes
https://www.nltk.org/_modules/nltk/classify/naivebayes.html
which in turn is a subclass of the classifier interface:
https://www.nltk.org/api/nltk.classify.html
Also used is FreqDist and ELEProbDist (.5 smoothing) from nltk.probability
- I think that the ELEProbDist is not used 
- in gpdaquestioner, UniformProbDist is used

In [30]:
print("observed label/emotion frequencies:")
print(k._label_freqdist)
for lab in k._label_freqdist:
 print(lab, k._label_freqdist[lab])

observed label frequencies:
<FreqDist with 167 samples and 593 outcomes>
happiness 61
anger 40
sadness 33
love 27
frustration 21
confusion 18
depression 17
jealousy 13
excitement 13
fear 12
euphoria 10
surprise 8
disgust 8
anxiety 8
envy 8
hope 8
nervousness 8
concern 8
elation 7
melancholy 6
regret 6
tiredness 6
boredom 5
relief 5
contentment 5
hatred 5
thrill 4
worry 4
embarrassment 4
guilt 4
loneliness 4
joy 4
fury 4
disappointment 4
relaxation 4
nostalgia 4
calm 3
shame 3
enthusiasm 3
mad 3
pity 3
annoyance 3
pride 3
cheerfulness 3
apathy 3
resentment 3
obsession 3
remorse 3
proud 2
serenity 2
thankfulness 2
bravery 2
gratefulness 2
scared 2
distress 2
affection 2
suffering 2
eagerness 2
shyness 2
misery 2
satisfaction 2
sorrow 2
exhaustion 2
stress 2
dissatisfied 2
lust 2
hostility 2
agitation 2
quixotic 2
conflicted 2
perplexity 2
ambivalence 2
inquisitiveness 2
discontentment 2
angry 2
astounded 2
overwhelmed 2
accepting 2
disillusionment 2
hate 2
silly 1
adoration 1
admiration 

In [32]:
print("observed label/emotion probabilities:")
print(k._label_probdist)
for lab in k._label_freqdist:
 print(lab, k._label_probdist.prob(lab))

observed label frequencies:
<ELEProbDist based on 593 samples>
happiness 0.09090909090909091
anger 0.0598669623059867
sadness 0.04951958610495196
love 0.04065040650406504
frustration 0.031781226903178125
confusion 0.027346637102734665
depression 0.025868440502586843
jealousy 0.019955654101995565
excitement 0.019955654101995565
fear 0.018477457501847747
euphoria 0.015521064301552107
surprise 0.012564671101256468
disgust 0.012564671101256468
anxiety 0.012564671101256468
envy 0.012564671101256468
hope 0.012564671101256468
nervousness 0.012564671101256468
concern 0.012564671101256468
elation 0.011086474501108648
melancholy 0.009608277900960829
regret 0.009608277900960829
tiredness 0.009608277900960829
boredom 0.008130081300813009
relief 0.008130081300813009
contentment 0.008130081300813009
hatred 0.008130081300813009
thrill 0.0066518847006651885
worry 0.0066518847006651885
embarrassment 0.0066518847006651885
guilt 0.0066518847006651885
loneliness 0.0066518847006651885
joy 0.006651884700665

features are a default dict of freqdists
dict\[(emotion label, annotation gloss)\] = {'yes': count, 'no': count} 

In [34]:
print("observed feature frequencies:")
print(k._feature_freqdist)

observed feature frequencies:
defaultdict(<class 'nltk.probability.FreqDist'>, {('jealousy', 'e.valence==positive'): FreqDist({'no': 9}), ('jealousy', 'directed(e,otherPerson)'): FreqDist({'yes': 5, 'no': 2}), ('jealousy', 'e==anger'): FreqDist({'no': 8}), ('jealousy', 'e.duration==long'): FreqDist({'other': 3, 'yes': 3, 'no': 2}), ('jealousy', 'similar(e,anger)'): FreqDist({'yes': 3, 'other': 1}), ('jealousy', 'cause(something,e)'): FreqDist({'yes': 5}), ('jealousy', 'e==regret'): FreqDist({'no': 4}), ('jealousy', 'e==guilt'): FreqDist({'no': 1}), ('jealousy', 'e==frustration'): FreqDist({'no': 7}), ('jealousy', 'e==jealousy'): FreqDist({'yes': 10}), ('boredom', 'e.valence==positive'): FreqDist({'no': 4}), ('boredom', 'e.duration==long'): FreqDist({'yes': 2, 'no': 1}), ('boredom', 'cause(e,inactivity)'): FreqDist({'yes': 1}), ('boredom', 'e==depression'): FreqDist({'no': 4}), ('boredom', 'e==sadness'): FreqDist({'no': 3}), ('boredom', 'directed(e,otherPerson)'): FreqDist({'other': 1, 

looks like some features have value {None: 1}?????

a feature is a question answer pair
features is a dict

In [36]:
features = {}
features["e.valence==positive"] = "no"     # not positive
features["directed(e,otherPerson"] = "yes" # directed at someone else

In [47]:
features = {}
features["e.valence==positive"] = "no"     # not positive
features["directed(e,otherPerson)"] = "yes" # directed at someone else

prior = k.prob_classify(features)
for e in sorted(prior.samples(), key=prior.prob, reverse=True):
    print(e, prior.prob(e))

anger 0.2578312238205985
frustration 0.11603881106909969
jealousy 0.0678924751852567
confusion 0.039663498660860524
sadness 0.034097831636104185
envy 0.0325424458124821
concern 0.02722316207720724
hatred 0.025268252042633145
tiredness 0.02516190418050085
love 0.016455932350997123
anxiety 0.01645201427186595
resentment 0.016341837886696874
disgust 0.01590964017499124
worry 0.01339983062866909
mad 0.013027613111206038
nervousness 0.01265539559374303
obsession 0.010422090488964834
fear 0.01028673866443283
dissatisfied 0.00930543793657575
ambivalence 0.007444350349260604
agitation 0.007444350349260604
misery 0.005955480279408486
depression 0.005862425900042718
regret 0.00541948705426172
disappointment 0.005359932251467639
surprise 0.005206791329997137
guilt 0.005002603434703131
boredom 0.0049132712305119996
melancholy 0.004866478171173789
discontentment 0.004466610209556364
hate 0.004466610209556364
loneliness 0.004019949188600724
annoyance 0.0038909137825468743
fury 0.0035732881676450927


In [50]:
features = {}
features["cause(e,feelGood)"] = "yes"     # not positive
features["cause(otherPerson,e)"] = "yes"  # caused by someone else
features["e.duration==long"] = "no"      # long duration

prior = k.prob_classify(features)
for e in sorted(prior.samples(), key=prior.prob, reverse=True):
    print(e, prior.prob(e))

happiness 0.33731672379402544
excitement 0.06249551064247811
love 0.04638668131358023
frustration 0.04131887874069691
elation 0.034719728134710054
anger 0.0324776869492679
surprise 0.03243051529066329
sadness 0.027257749653653806
euphoria 0.026707483180546172
fear 0.026230563838036434
confusion 0.02352802089714785
nervousness 0.01981864823318309
anxiety 0.01667855072091254
jealousy 0.011537632733995959
contentment 0.011191707237562222
pity 0.008902494393515393
envy 0.0077833236697591865
nostalgia 0.007630709480156061
concern 0.007412689209294463
pride 0.007121995514812317
hatred 0.006994817023476386
joy 0.006867638532140457
relaxation 0.006867638532140449
disgust 0.006726329097322751
astounded 0.006358924566796716
hope 0.006053696187590478
distress 0.005087139653437375
hate 0.005087139653437375
inquisitiveness 0.004769193425097536
hostility 0.0038153547400780305
embarrassment 0.0038153547400780257
relief 0.0037305690791874075
depression 0.0034620811530337676
tiredness 0.003306640774734