In [1]:
# Inference pipeline (Test)

In [45]:
from flair.data import Sentence
from flair.models import MultiTagger
from flair.models import TextClassifier
from colorama import Fore

In [3]:
# load the 'chunk' and POS taggers
tagger = MultiTagger.load(['chunk-fast', 'pos-fast'])

2021-03-11 07:37:38,686 --------------------------------------------------------------------------------
2021-03-11 07:37:38,688 The model key 'chunk-fast' now maps to 'https://huggingface.co/flair/chunk-english-fast' on the HuggingFace ModelHub
2021-03-11 07:37:38,690  - The most current version of the model is automatically downloaded from there.
2021-03-11 07:37:38,691  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/chunk-fast/en-chunk-conll2000-fast-v0.4.pt)
2021-03-11 07:37:38,692 --------------------------------------------------------------------------------


Downloading:   0%|          | 0.00/75.2M [00:00<?, ?B/s]

2021-03-11 07:38:10,072 loading file /Users/ike/.flair/models/chunk-english-fast/be3a207f4993dd6d174d5083341a717d371ec16f721358e7a4d72158ebab28a6.a7f897d05c83e618a8235bbb7ddfca5a79d2daefb8a97c776eb73f97dbaea508
2021-03-11 07:38:11,679 --------------------------------------------------------------------------------
2021-03-11 07:38:11,680 The model key 'pos-fast' now maps to 'https://huggingface.co/flair/pos-english-fast' on the HuggingFace ModelHub
2021-03-11 07:38:11,681  - The most current version of the model is automatically downloaded from there.
2021-03-11 07:38:11,684  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/pos-fast/en-pos-ontonotes-fast-v0.5.pt)
2021-03-11 07:38:11,685 --------------------------------------------------------------------------------


Downloading:   0%|          | 0.00/75.3M [00:00<?, ?B/s]

2021-03-11 07:38:41,082 loading file /Users/ike/.flair/models/pos-english-fast/36f7923039eed4c66e4275927daaff6cd275997d61d238355fb1fe0338fe10a1.ff87e5b4e47fdb42a0c00237d9506c671db773e0a7932179ace82e584383a1b8


In [4]:
# load sentiment tagger
classifier = TextClassifier.load('sentiment')

2021-03-11 07:38:43,166 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /var/folders/qx/wpls85r17mg43_mg7lys9h9w0000gs/T/tmpuhjzuqwx


100%|██████████| 265512723/265512723 [03:57<00:00, 1117355.02B/s]

2021-03-11 07:42:41,400 copying /var/folders/qx/wpls85r17mg43_mg7lys9h9w0000gs/T/tmpuhjzuqwx to cache at /Users/ike/.flair/models/sentiment-en-mix-distillbert_4.pt





2021-03-11 07:42:43,254 removing temp file /var/folders/qx/wpls85r17mg43_mg7lys9h9w0000gs/T/tmpuhjzuqwx
2021-03-11 07:42:43,430 loading file /Users/ike/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [55]:
# make a sentence
text = 'I love Bamboo HR, but I really hate interviews.'
# text = 'I think she is really cool but maybe too cool'
# text = 'I like the Samsung smart watch because it is sleek and durable'
sentence = Sentence(text)

In [56]:
# run NER over sentence
tagger.predict(sentence)

In [57]:
# check prediction
print(sentence)

Sentence: "I love Bamboo HR , but I really hate interviews ."   [− Tokens: 11  − Token-Labels: "I <S-NP/PRP> love <S-VP/VBP> Bamboo <B-NP/NNP> HR <E-NP/NNP> , <,> but <CC> I <S-NP/PRP> really <S-ADVP/RB> hate <S-VP/VBP> interviews <S-NP/NNS> . <.>"]


In [58]:
def get_conjunctions(sentence):
    pos = [span.tag for span in sentence.get_spans('pos-fast')]
    has_conjunction = 'CC' in pos
    breaks = []
    if has_conjunction:
        for i, val in enumerate(pos):
            if val == 'CC':
                breaks.append(i)
    return pos, has_conjunction, breaks

In [59]:
pos, has_conjunction, breaks = get_conjunctions(sentence)

In [60]:
print(pos)
print(has_conjunction)
print(breaks)

['PRP', 'VBP', 'NNP', 'NNP', ',', 'CC', 'PRP', 'RB', 'VBP', 'NNS', '.']
True
[5]


In [61]:
def combine_spans(spans):
    text = [text.to_original_text() for text in spans]
    return ' '.join(text)

In [62]:
parts = []
spans = sentence.get_spans('pos-fast')
current_break = 0

for next_cc in breaks:
    before_cc = spans[current_break:next_cc]
    cc = spans[next_cc]
    parts.append({'type': 'phrase', 'text': combine_spans(before_cc)})
    parts.append({'type': 'conjunction', 'text': cc.text})
    current_break = next_cc

last_part = spans[breaks[-1]+1:]
parts.append({'type': 'phrase', 'text': combine_spans(last_part)})

In [63]:
parts

[{'type': 'phrase', 'text': 'I love Bamboo HR ,'},
 {'type': 'conjunction', 'text': 'but'},
 {'type': 'phrase', 'text': 'I really hate interviews .'}]

In [64]:
for part in parts:
    if part['type'] == 'phrase':
        sentence = Sentence(part['text'])
        classifier.predict(sentence)
        part['sentiment'] = sentence.to_dict()['labels'][0]['value']
        part['labels'] = sentence.to_dict()['labels'][0]['confidence']
    if part['type'] == 'conjunction':
        if part['text'] in ('but'):
            part['reverse'] = True
        elif part['text'] in ('and', 'or'):
            part['reverse'] = False
        else:
            part['reverse'] = None
            print('Error.. unknown conjunction')

In [65]:
parts

[{'type': 'phrase',
  'text': 'I love Bamboo HR ,',
  'sentiment': 'POSITIVE',
  'labels': 0.9989123344421387},
 {'type': 'conjunction', 'text': 'but', 'reverse': True},
 {'type': 'phrase',
  'text': 'I really hate interviews .',
  'sentiment': 'NEGATIVE',
  'labels': 0.9963917136192322}]

In [76]:
def color_text(parts):
    output = ''
    for part in parts:
        if ('sentiment' in part.keys()) and (part['sentiment'] == 'POSITIVE'):
            output += Fore.GREEN + part['text'] + ' [' +  str(round(part['labels'], 3)) + ']'
        elif 'sentiment' in part.keys() and (part['sentiment'] == 'NEGATIVE'):
            output += Fore.RED + part['text'] + ' [' +  str(round(part['labels'], 3)) + ']'
        else:
            output += Fore.BLACK + part['text']
        output += ' '
    return output

In [77]:
print(color_text(parts))

[32mI love Bamboo HR , [0.999] [30mbut [31mI really hate interviews . [0.996] 
