# Contextual valence shifting

In [1]:
import pandas as pd
from lemmatization import lemmatize
from lemmatization import pos_tagging
from nltk.tokenize import TweetTokenizer
from contextual_valence_shifting import ContextualValenceShifter

### Dependency parsing

In [2]:
from itertools import chain
from nltk.parse.stanford import StanfordDependencyParser
from nltk.parse import DependencyGraph
path_to_jar = 'stanford_parser/stanford-parser.jar'
path_to_models_jar = 'stanford_parser/stanford-parser-3.9.1-models.jar'
dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

result = dependency_parser.parse(['The', 'very', 'brilliant', 'organizer', 'failed', 'to', 'solve', 'the', 'problem'])
dg = result.__next__()
for node in dg.nodes.values():
    print('Word:', node['word'])
    print('Position:', node['address'])
    print('Relation:', node['rel'])
    print('Children:', list(chain.from_iterable(node['deps'].values())))
print('Triples:', list(dg.triples()))
print('Tree:', list(dg.tree()))

tree = dg.tree()
from nltk.draw.tree import draw_trees
draw_trees(tree)

Word: None
Position: 0
Relation: None
Children: [5]
Word: The
Position: 1
Relation: det
Children: []
Word: organizer
Position: 4
Relation: nsubj
Children: [1, 3]
Word: very
Position: 2
Relation: advmod
Children: []
Word: brilliant
Position: 3
Relation: amod
Children: [2]
Word: failed
Position: 5
Relation: root
Children: [4, 7]
Word: to
Position: 6
Relation: mark
Children: []
Word: solve
Position: 7
Relation: xcomp
Children: [6, 9]
Word: the
Position: 8
Relation: det
Children: []
Word: problem
Position: 9
Relation: dobj
Children: [8]
Triples: [(('failed', 'VBD'), 'nsubj', ('organizer', 'NN')), (('organizer', 'NN'), 'det', ('The', 'DT')), (('organizer', 'NN'), 'amod', ('brilliant', 'JJ')), (('brilliant', 'JJ'), 'advmod', ('very', 'RB')), (('failed', 'VBD'), 'xcomp', ('solve', 'VB')), (('solve', 'VB'), 'mark', ('to', 'TO')), (('solve', 'VB'), 'dobj', ('problem', 'NN')), (('problem', 'NN'), 'det', ('the', 'DT'))]
Tree: [Tree('organizer', ['The', Tree('brilliant', ['very'])]), Tree('solve',

### Load lexicon

In [3]:
lexicon = pd.read_csv('lexicons/Ratings_Warriner_et_al.csv', usecols=[0, 1, 2, 5], index_col=0)
lexicon.columns = ['word', 'valence', 'arousal']
print(lexicon.head())

          word  valence  arousal
1     aardvark     6.26     2.41
2      abalone     5.30     2.65
3      abandon     2.84     3.73
4  abandonment     2.63     4.95
5        abbey     5.85     2.20


### Valence shifting 

In [4]:
path_to_jar = 'stanford_parser/stanford-parser.jar'
path_to_models_jar = 'stanford_parser/stanford-parser-3.9.1-models.jar'
valence_shifter = ContextualValenceShifter(path_to_jar, path_to_models_jar, lexicon)

#### 1. Negation rule
If a word is in relation with negatives (e.g. not, never, nothing), then the initial valence of the word is shifted, i.e. is multiplied by -1. 

In [5]:
sentence = 'He is not stupid'
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

[0, 1.6799999999999997, 0, -1.6600000000000001]
[0, 1.6799999999999997, 0, 1.6600000000000001]


#### 2. Intensifiers rule
 If there is an intensifier in the tweet, then the valence of the word that is in relation with the intensifier is increased by multiplying the initial valence with 1,5.

In [6]:
sentence = 'The film was absolutely awful'
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

[0, 1.83, 1.6799999999999997, 0, -2.22]
[0, -1.83, -1.6799999999999997, 0, -3.33]


#### 3. Mitigators rule
If there is a mitigator in the tweet, then the valence of the word that is in relation with the mitigator is decreased by multiplying the initial valence with 0,5.

In [7]:
sentence = 'By the end of the day we were rather tired'
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

[0, 0, 0.23000000000000043, 0, 0, 1.8600000000000003, 0, 1.6799999999999997, 0, -0.20999999999999996]
[0, 0, -0.23000000000000043, 0, 0, -1.8600000000000003, 0, -1.6799999999999997, 0, -0.10499999999999998]


#### 4. Negative words rule  
If the word is in a relation with a negative word, then it is multiplicated with -1 only if the word has positive valence. Otherwise, the valence remains the same.

In [8]:
sentence = 'The very brilliant organizer failed to solve the problem'
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

[0, 0, 3.0, 1.8499999999999996, -2.17, 0, 1.7999999999999998, 0, -0.98]
[0, 0, -4.5, -1.8499999999999996, -2.17, 0, -1.7999999999999998, 0, -0.98]


#### 5. Conjuctive adverbs
If there is a conjunctive adverb in the tweet, then the valences are neutralized by multiplication with 0.

In [9]:
sentence = 'Although Boris is brilliant at math, he is a horrible teacher'
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

[0, 0, 1.6799999999999997, 3.0, 0, -0.04999999999999982, 0, 0, 1.6799999999999997, 0, -2.17, 2.87]
[0, 0, 1.6799999999999997, -0.0, 0, -0.04999999999999982, 0, 0, 1.6799999999999997, 0, -2.17, -2.87]


In [11]:
sentence = "I wouldn't wish anxiety and depression even on the worst of people. It's not fun. #anxiety #depression"
tknzr = TweetTokenizer()
tokens = tknzr.tokenize(sentence)
print(tokens)
tags = pos_tagging(tokens)
lemmas = [x[1] for x in lemmatize(tags)]

intial_valences = valence_shifter.get_initial_valences_sentence(lemmas)
print(intial_valences)
valences = valence_shifter.change_valence_sentence(lemmas, intial_valences)
print(valences)

['I', "wouldn't", 'wish', 'anxiety', 'and', 'depression', 'even', 'on', 'the', 'worst', 'of', 'people', '.', "It's", 'not', 'fun', '.', '#anxiety', '#depression']
[0, 0, 2.6399999999999997, -2.12, 0, -2.06, 0, 0, 0, -1.2599999999999998, 0, 1.2000000000000002, 0, 0, 0, 3.869999999999999, 0, -2.12, -2.06]
[0, 0, -2.6399999999999997, -2.12, 0, -2.06, 0, 0, 0, -1.2599999999999998, 0, -1.2000000000000002, 0, 0, 0, 3.869999999999999, 0, -2.12, -2.06]
