##Information theoretic motivation for using meaning specificity in negation resolution


In [0]:
#utils
def interpolate(raw, value_range, n_bins, eps = 1e-5):
  '''
  :param raw:
    raw value to interpolate
  :param value_range:
    2-tuple (lower bound, upper bound)
  :param n_bins:
    range of bins to interpolate raw to
  :param eps:
    for over/underflow numerical issues"
  '''
  length = value_range[1] - value_range[0]
  adjusted_raw = raw - value_range[0] - eps
  bin_divisor = length / float(n_bins)
  return int(adjusted_raw/ bin_divisor)

def bin_attr(lexicon_raw):
  lexicon = []
  for w in lexicon_raw:
    binned_affirm = interpolate(w.affirm, affirm_range, N_AFFIRM_BINS)
    binned_negated = interpolate(w.negated, negated_range, N_NEGATED_BINS)
    binned_spec = interpolate(w.spec, spec_range, N_SPEC_BINS)
    binned_word = word(w.word, binned_affirm, binned_negated, binned_spec)
    lexicon.append(binned_word)
  return lexicon

def bin_attr2(lexicon_raw):
  lexicon = []
  for w in lexicon_raw:
    binned_affirm = interpolate(w.affirm, affirm_range, N_AFFIRM_BINS)
    binned_negated = interpolate(w.negated, negated_range, N_NEGATED_BINS)
    binned_spec = interpolate(w.spec, spec_range, N_SPEC_BINS)
    binned_freq = interpolate(w.freq, freq_range, N_FREQ_BINS)
    binned_dp = interpolate(w.dp, dp_range, N_DP_BINS)
    binned_word = word(w.word, binned_affirm, binned_negated, binned_dp, binned_freq,binned_spec)
    lexicon.append(binned_word)
  return lexicon

In [0]:
from itertools import product
from collections import namedtuple, defaultdict
import math



'''
   affirm: valence in the affirmative context
   negated: valence in the negated context
   freq: frequency bins F = {1,2,...,|F|}
   dp: dispersion bins D = {1,2,...,|D|}
   spec: meaning specificity M = {1,2,...,|M|}
'''

N_AFFIRM_BINS = 3 #"negative", "neutral", "positive"
N_NEGATED_BINS = 3 #"negative", "neutral", "positive"
N_FREQ_BINS = 3 #"rare", "somewhat frequent", "frequent"
N_DP_BINS = 3 #"few context", "some context", "every context"
N_SPEC_BINS = 3 #"not specific", "some what specific", "very specific"


affirm_range = (-1.0,1.0)
negated_range = (-1.0,1.0)
freq_range = (0,1.0)
dp_range = (0,1.0)
spec_range = (0,10)


In [0]:
#calculate information gain
def information_gain_nga(lexicon, print_enabled = True):
  '''
  (SANITY CHECK)
  I[N:A] = H[N] - H[N|A] , N: negated valence, A: affirmative score
  H[N] = -sum(p(n)*logp(n))
  H[N|A] = -sum_a(p(a) * sum_n(p(n|a) * logp(n|a)) )

  A = AFFIRM_RANGE = {1,2,...,|A|}
  p(n) = count(words with n) / count(words with some n in N)
  p(a) = count(words with a) / count(words with some a in A)
  p(n|a) = count(words with n and a) / count(words with some n in N and a)
  '''
  
  #calculating H[N]
  #p(n)
  prob_n = {n:1e-20 for n in range(N_NEGATED_BINS)}
  for w in lexicon:
    prob_n[w.negated] += 1
  for n in prob_n:
    prob_n[n] /= float(len(lexicon)) #all words have some n
  
  entropy_n = -sum([prob_n[n]*math.log2(prob_n[n]) for n in range(N_NEGATED_BINS)])
  
  
  #calclating H[N|A]
  #p(a)
  prob_a = {a:1e-20 for a in range(N_AFFIRM_BINS)}
  for w in lexicon:
    prob_a[w.affirm] += 1
  for a in prob_a:
    prob_a[a] /= float(len(lexicon)) #all words have some a
  
  #p(n|a)
  n_a = set(product(range(N_NEGATED_BINS),range(N_AFFIRM_BINS)))
  prob_nga = {(n,a):1e-20 for n,a in n_a}
  count_a = {a:0 for a in range(N_AFFIRM_BINS)}
  for w in lexicon:
    prob_nga[(w.negated, w.affirm)] += 1
    count_a[w.affirm] += 1

  for n,a in prob_nga:
    prob_nga[(n,a)] /= (float(count_a[a])+1e-20) if count_a[a] != 0 else 1
  
  entropy_nga = -sum([prob_a[a] * sum([prob_nga[(n,a)] * math.log2(prob_nga[(n,a)]) for n in range(N_NEGATED_BINS)]) for a in range(N_AFFIRM_BINS)])

  if print_enabled: print(f"Information gain I[A:N] = {round(entropy_n - entropy_nga,5)} = H[N]({round(entropy_n,5)}) - H[N|A]({round(entropy_nga,5)})")
  return entropy_n - entropy_nga

def information_gain_ngam(lexicon, print_enabled = True):
  '''
  calculating I[M:N|A] = H[N|A] - H[N|A,M]
  H[N|A] as calculated above
  H[N|A,M] = -sum_m( p(m)* sum_a( p(a|m)* sum_n( p(n|a,m) * logp(n|a,m) ) ) )

  p(m) = count(words with m) / count( words with some m in M)
  p(a|m) = count(words with m and a) / count (words with some m in M and a)
  p(n|a,m) = count(words with m and a and n) / count (words with some n in N, m and a)
  '''
  
  
  #calculating H[N|A]
  #p(a)
  prob_a = {a:1e-20 for a in range(N_AFFIRM_BINS)}
  for w in lexicon:
    prob_a[w.affirm] += 1
  for a in prob_a:
    prob_a[a] /= float(len(lexicon)) #all words have some a

  #p(n|a)
  n_a_pairs = set(product(range(N_NEGATED_BINS),range(N_AFFIRM_BINS)))
  prob_nga = {(n,a):1e-20 for n,a in n_a_pairs}
  count_a = {a:0 for a in range(N_AFFIRM_BINS)}
  for w in lexicon:
    prob_nga[(w.negated, w.affirm)] += 1
    count_a[w.affirm] += 1

  for n,a in prob_nga:
    prob_nga[(n,a)] /= (float(count_a[a])+1e-20) if count_a[a] != 0 else 1
    
  entropy_nga = -sum([prob_a[a] * sum([prob_nga[(n,a)] * math.log2(prob_nga[(n,a)]) for n in range(N_NEGATED_BINS)]) for a in range(N_AFFIRM_BINS)])
  
  
  #calculating H[N|A,M]
  #p(m)
  prob_m = {m:1e-20 for m in range(N_SPEC_BINS)}
  for w in lexicon:
    prob_m[w.spec] += 1
  for m in prob_m:
    prob_m[m] /= float(len(lexicon))

  #p(a|m)
  a_m = set(product(range(N_AFFIRM_BINS),range(N_SPEC_BINS)))
  prob_agm = {(a,m):1e-10 for a,m in a_m}
  count_m = {m:0 for m in range(N_SPEC_BINS)}
  for w in lexicon:
    prob_agm[(w.affirm, w.spec)] += 1
    count_m[w.spec] += 1

  for a,m in a_m:
    prob_agm[(a,m)] /= (float(count_m[m])+1e-10) if count_m[m] != 0 else 1

  #p(n|a,m)
  n_a_m = set([(n,a,m) for n in range(N_NEGATED_BINS) for a in range(N_AFFIRM_BINS) for m in range(N_SPEC_BINS)])
  prob_ngam = {(n,a,m):1e-10 for n,a,m in n_a_m}
  count_a_m = {(a,m):0 for a in range(N_AFFIRM_BINS) for m in range(N_SPEC_BINS)}
  for w in lexicon:
    prob_ngam[(w.negated, w.affirm, w.spec)] += 1
    count_a_m[(w.affirm, w.spec)] += 1

  for n,a,m in n_a_m:
    #if (a,m) doesn't exist, make p(n|a,m) essentially zero
    prob_ngam[(n,a,m)] /= (float(count_a_m[(a,m)])+1e-10) if count_a_m[(a,m)] != 0 else 1

  entropy_ngam = -sum([ prob_m[m]* sum([ prob_agm[(a,m)]* sum([ prob_ngam[(n,a,m)] * math.log2(prob_ngam[(n,a,m)]) for n in range(N_NEGATED_BINS)]) for a in range(N_AFFIRM_BINS)]) for m in range(N_SPEC_BINS)])
  
  #information gain
  if print_enabled: print(f"Information gain I[M:N|A] = {round(entropy_nga - entropy_ngam,5)} = H[N|A]({round(entropy_nga,5)}) - H[N|A,M]({round(entropy_ngam,5)})")
  
  return entropy_nga - entropy_ngam
  

In [20]:
word_raw = namedtuple("word_raw", "affirm negated freq dp")
word = namedtuple("word", "word affirm negated spec")
lexicon_raw = [     
    word("promise",       .7,       -0.9,        2),
    word("sneaky",       -.6,       -0.3,        7),
    word("hallowed",     -.2,        0.0,        9),
    word("hug",           .8,       -0.7,        2),
    word("spill",        -.7,        0.4,        4),
    word("abundant",      .7,       -0.3,        1),
    word("defective",    -.7,        0.4,        2),
    word("simple",        .6,       -0.5,        1),
    word("enthusiastic",  .8,       -0.6,        4),
    word("premium",       .8,       -0.3,        7)
]

lexicon = bin_attr(lexicon_raw)
ignga = information_gain_nga(lexicon)
igngam = information_gain_ngam(lexicon)


Information gain I[A:N] = 0.69546 = H[N](1.52193) - H[N|A](0.82647)
Information gain I[M:N|A] = 0.50195 = H[N|A](0.82647) - H[N|A,M](0.32451)


In [19]:
import random as r

baselineA_ig_nga = 0.0
baselineA_ig_ngam = 0.0
baselineB_ig_ngam = 0.0

n_trials = 1000
for _ in range(n_trials):
  #baseline 1
  lexicon_rand_a = [word(w.word, r.uniform(-1,1), w.negated, r.uniform(0,1)*10) for w in lexicon_raw]

  #baseline 2
  lexicon_rand_spec = [word(w.word, w.affirm, w.negated, r.uniform(0,1)*10) for w in lexicon_raw]

  lexicon = bin_attr(lexicon_rand_a)
  baselineA_ig_nga += information_gain_nga(lexicon, print_enabled = False)
  baselineA_ig_ngam += information_gain_ngam(lexicon, print_enabled = False)

  lexicon = bin_attr(lexicon_rand_spec)
  baselineB_ig_ngam += information_gain_ngam(lexicon, print_enabled = False)
  
print(f"Baseline A(random spec and affirm)\n\tI[A:N] : {baselineA_ig_nga/float(n_trials)}\n\tI[M:N|A] : {baselineA_ig_ngam/float(n_trials)}\n")
print(f"Baseline B(random spec):\n\tI[M:N|A] : {baselineB_ig_ngam/float(n_trials)}") 




Baseline A(random spec and affirm)
	I[A:N] : 0.3922487607371325
	I[M:N|A] : 0.5621690303177413

Baseline B(random spec):
	I[M:N|A] : 0.3455262498180381
