# Sentiment Analysis (sub-sentence level)

## Inference pipeline - Proof of concept

## Load imports and models

In [155]:
from flair.data import Sentence
from flair.models import MultiTagger
from flair.models import TextClassifier
from colorama import Fore

In [156]:
# load the 'chunk' and POS taggers
tagger = MultiTagger.load(['chunk-fast', 'pos-fast'])

2021-03-11 13:21:24,330 --------------------------------------------------------------------------------
2021-03-11 13:21:24,333 The model key 'chunk-fast' now maps to 'https://huggingface.co/flair/chunk-english-fast' on the HuggingFace ModelHub
2021-03-11 13:21:24,334  - The most current version of the model is automatically downloaded from there.
2021-03-11 13:21:24,337  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/chunk-fast/en-chunk-conll2000-fast-v0.4.pt)
2021-03-11 13:21:24,338 --------------------------------------------------------------------------------
2021-03-11 13:21:24,684 loading file /Users/ike/.flair/models/chunk-english-fast/be3a207f4993dd6d174d5083341a717d371ec16f721358e7a4d72158ebab28a6.a7f897d05c83e618a8235bbb7ddfca5a79d2daefb8a97c776eb73f97dbaea508
2021-03-11 13:21:26,587 --------------------------------------------------------------------------------
2021-03-11 13:21:26,588 The model key 'po

In [157]:
# load sentiment tagger
classifier = TextClassifier.load('sentiment')

2021-03-11 13:21:28,742 loading file /Users/ike/.flair/models/sentiment-en-mix-distillbert_4.pt


## Create input sentence

In [158]:
# choose a sentence
text = 'I love Bamboo HR, but interviews make me nervous'

# text = 'I think sentiment analysis is really cool but maybe too cool'
# text = 'I like the Samsung smart watch because it is sleek and durable'

sentence = Sentence(text)

## Process text

In [159]:
# run NER over sentence
tagger.predict(sentence)

In [160]:
# check prediction
print(sentence)

Sentence: "I love Bamboo HR , but interviews make me nervous"   [− Tokens: 10  − Token-Labels: "I <S-NP/PRP> love <S-VP/VBP> Bamboo <B-NP/NNP> HR <E-NP/NNP> , <,> but <CC> interviews <S-NP/NNS> make <S-VP/VBP> me <S-NP/PRP> nervous <S-ADJP/JJ>"]


In [161]:
def get_conjunctions(sentence):
    '''
    Takes Sentence object as input
    Returns pos (list), has_conjunction (boolean), break (integer list)
    '''
    pos = [span.tag for span in sentence.get_spans('pos-fast')]
    has_conjunction = 'CC' in pos
    breaks = []
    if has_conjunction:
        for i, val in enumerate(pos):
            if val == 'CC':
                breaks.append(i)
    return pos, has_conjunction, breaks

In [162]:
pos, has_conjunction, breaks = get_conjunctions(sentence)

In [163]:
print(pos)
print(has_conjunction)
print(breaks)

['PRP', 'VBP', 'NNP', 'NNP', ',', 'CC', 'NNS', 'VBP', 'PRP', 'JJ']
True
[5]


## Break sentence into pieces

In [164]:
def combine_spans(spans):
    '''
    Takes several spans as input
    Text only string as output
    '''
    text = [text.to_original_text() for text in spans]
    return ' '.join(text)

In [165]:
# Logic to break up sentence into pieces based on heuristic
# currently achieved by breaking on conjunctions

parts = []
spans = sentence.get_spans('pos-fast')
current_break = 0

for next_cc in breaks:
    before_cc = spans[current_break:next_cc]
    cc = spans[next_cc]
    parts.append({'type': 'phrase', 'text': combine_spans(before_cc)})
    parts.append({'type': 'conjunction', 'text': cc.text})
    current_break = next_cc

last_part = spans[breaks[-1]+1:]
parts.append({'type': 'phrase', 'text': combine_spans(last_part)})

In [166]:
parts

[{'type': 'phrase', 'text': 'I love Bamboo HR ,'},
 {'type': 'conjunction', 'text': 'but'},
 {'type': 'phrase', 'text': 'interviews make me nervous'}]

## Annotate sentence pieces

In [167]:
# Logic to add sentiment scores or semantic information to each piece of sentence

for part in parts:
    if part['type'] == 'phrase':
        sentence = Sentence(part['text'])
        classifier.predict(sentence)
        part['sentiment'] = sentence.to_dict()['labels'][0]['value']
        part['labels'] = sentence.to_dict()['labels'][0]['confidence']
    if part['type'] == 'conjunction':
        if part['text'] in ('but'):
            part['reverse'] = True
        elif part['text'] in ('and', 'or'):
            part['reverse'] = False
        else:
            part['reverse'] = None
            print('Error.. unknown conjunction')

In [168]:
parts

[{'type': 'phrase',
  'text': 'I love Bamboo HR ,',
  'sentiment': 'POSITIVE',
  'labels': 0.9989123344421387},
 {'type': 'conjunction', 'text': 'but', 'reverse': True},
 {'type': 'phrase',
  'text': 'interviews make me nervous',
  'sentiment': 'NEGATIVE',
  'labels': 0.913139283657074}]

## Show color coded sentence

In [169]:
def color_text(parts):
    '''
    Takes annotated sentence pieces as input
    Outputs string with color coding and sentiment scores
    '''
    output = ''
    for part in parts:
        if ('sentiment' in part.keys()) and (part['sentiment'] == 'POSITIVE'):
            output += Fore.GREEN + part['text'] + ' [' +  str(round(part['labels'], 3)) + ']'
        elif 'sentiment' in part.keys() and (part['sentiment'] == 'NEGATIVE'):
            output += Fore.RED + part['text'] + ' [' +  str(round(part['labels'], 3)) + ']'
        else:
            output += Fore.BLACK + part['text']
        output += ' '
    return output

In [170]:
print(color_text(parts))

[32mI love Bamboo HR , [0.999] [30mbut [31minterviews make me nervous [0.913] 
