https://github.com/robinvanschaik/interpret-flair

In [123]:
import numpy as np
import torch
from flair.models import TextClassifier
from flair.data import Sentence
from interpretation_package.flair_model_wrapper import ModelWrapper
from interpretation_package.interpret_flair import interpret_sentence, visualize_attributions
from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization
from transformers import AutoTokenizer
import torch.nn as nn
import re

In [2]:
import torch
print(torch.__version__)

1.6.0


In [47]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [4]:
classifier = TextClassifier.load('sentiment')

2021-01-14 19:46:00,914 loading file /home/joey/.flair/models/sentiment-en-mix-distillbert_3.1.pt


HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=442.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=231508.0), HTML(value='')))




In [121]:
classifier

TextClassifier(
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0): TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_features=768, bias=True)
            )
            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (ffn

In [120]:
classifier.document_embeddings.model.embeddings

Embeddings(
  (word_embeddings): Embedding(30522, 768, padding_idx=0)
  (position_embeddings): Embedding(512, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [223]:
classifier.document_embeddings.model.embeddings.word_embeddings.weight[0,:] = 0

In [224]:
classifier.document_embeddings.model.embeddings.word_embeddings.weight

Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0132, -0.0673, -0.0161,  ..., -0.0227, -0.0554, -0.0260],
        [-0.0176, -0.0709, -0.0144,  ..., -0.0246, -0.0596, -0.0232],
        ...,
        [-0.0231, -0.0588, -0.0105,  ..., -0.0195, -0.0262, -0.0212],
        [-0.0490, -0.0561, -0.0047,  ..., -0.0107, -0.0180, -0.0219],
        [-0.0065, -0.0915, -0.0025,  ..., -0.0151, -0.0504,  0.0460]],
       device='cuda:0', grad_fn=<CopySlices>)

In [8]:
s = Sentence("I like apple.")
s.labels

[]

In [225]:
classifier.predict(s)
s.labels

[POSITIVE (0.9976)]

In [226]:
classifier.forward([s])

tensor([[-2.6498,  3.3708]], device='cuda:0', grad_fn=<AddmmBackward>)

In [124]:
softmax = nn.Softmax(dim=1)
softmax(classifier.forward([s]))

tensor([[0.0024, 0.9976]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [18]:
model_name = classifier.document_embeddings.get_names()[0].split('transformer-document-')[-1]
model_name

'distilbert-base-uncased'

In [24]:
print(classifier.label_dictionary)

Dictionary with 2 tags: NEGATIVE, POSITIVE


In [26]:
initial_cls_token = classifier.document_embeddings.initial_cls_token
initial_cls_token

True

In [21]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer

PreTrainedTokenizer(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [16]:
classifier.decoder

Linear(in_features=768, out_features=2, bias=True)

In [227]:
flair_model_wrapper = ModelWrapper(classifier)

  if p.grad is not None:


In [228]:
torch_classifier = classifier.document_embeddings.model
torch_classifier

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0): TransformerBlock(
        (attention): MultiHeadSelfAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): Linear(i

In [106]:
tokenizer("[PAD] [PAD] [UNK] [MASK] [CLS], PAD good")

{'input_ids': [101, 0, 0, 100, 103, 101, 1010, 11687, 2204, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [229]:
pad_token_id = tokenizer.pad_token_id

In [230]:
tokenized_sentence = s.to_tokenized_string()
tokenizer_max_length = flair_model_wrapper.tokenizer.model_max_length
input_ids = flair_model_wrapper.tokenizer.encode(tokenized_sentence,
                                                     add_special_tokens=False,
                                                     max_length=tokenizer_max_length,
                                                     truncation=True,
                                                     return_tensors="pt")
input_ids = input_ids.to(device)
ref_base_line = torch.full_like(input_ids, pad_token_id)

In [231]:
input_ids

tensor([[1045, 2066, 6207, 1012]], device='cuda:0')

In [232]:
lig = LayerIntegratedGradients(flair_model_wrapper, flair_model_wrapper.model.embeddings)

In [240]:

attributions_ig, delta = lig.attribute(input_ids, ref_base_line, \
                                           n_steps=500, return_convergence_delta=True, target = 0)
word_attributions, attribution_score = summarize_attributions(attributions_ig)
word_attributions, attribution_score

(tensor([-0.3926,  0.8961, -0.0821, -0.1901], device='cuda:0',
        dtype=torch.float64),
 tensor(0.2313, device='cuda:0', dtype=torch.float64))

In [247]:
attributions_ig, delta = lig.attribute(input_ids, n_steps=500, return_convergence_delta=True, target = 0)
word_attributions, attribution_score = summarize_attributions(attributions_ig)
word_attributions, attribution_score

(tensor([-0.3926,  0.8961, -0.0821, -0.1901], device='cuda:0',
        dtype=torch.float64),
 tensor(0.2313, device='cuda:0', dtype=torch.float64))

In [235]:
ref_base_line

tensor([[0, 0, 0, 0]], device='cuda:0')

In [236]:

attributions_ig, delta = lig.attribute(ref_base_line, ref_base_line, \
                                           n_steps=500, return_convergence_delta=True, target = 0)
word_attributions, attribution_score = summarize_attributions(attributions_ig)
word_attributions, attribution_score

(tensor([nan, nan, nan, nan], device='cuda:0', dtype=torch.float64),
 tensor(nan, device='cuda:0', dtype=torch.float64))

In [237]:
softmax = nn.Softmax(dim=1)

In [238]:
flair_model_wrapper.forward(ref_base_line)

tensor([[ 0.8581, -0.0681]], device='cuda:0', grad_fn=<ViewBackward>)

In [239]:
softmax(flair_model_wrapper.forward(ref_base_line))

tensor([[0.7163, 0.2837]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [246]:
def summarize_attributions(attributions):
    """
    Helper function for calculating word attributions.
    Inputs:
    attributions_ig: integrated gradients attributions.
    Ouputs:
    word_attributions: the attributions score per token.
    attribution_score: the attribution score of the entire document w.r.t. ground label.
    """
    attributions = attributions.sum(dim=-1).squeeze(0)
    if torch.norm(attributions) != 0:
        attributions = attributions / torch.norm(attributions)
    attribution_score = attributions.sum()

    return attributions, attribution_score

In [244]:
word_attributions, attribution_score = summarize_attributions(attributions_ig)
word_attributions, attribution_score

(tensor([-2.9920e-05,  6.8299e-05, -6.2555e-06, -1.4491e-05], device='cuda:0',
        dtype=torch.float64),
 tensor(1.7632e-05, device='cuda:0', dtype=torch.float64))

In [None]:
visualization_list.append(
    viz.VisualizationDataRecord(word_attributions=word_attributions,
                                pred_prob=prediction_confidence,
                                pred_class=pred_label,
                                true_class=target_label,
                                attr_class=target_label,
                                attr_score=attribution_score,
                                raw_input=readable_tokens,
                                convergence_score=delta)
                    )

In [79]:
hidden_states = torch_classifier(input_ids = inputs['input_ids'])[-1]

In [80]:
len(torch_classifier(input_ids = inputs['input_ids']))

2

In [81]:
index_of_CLS_token = 0
hidden_states[-1][0][index_of_CLS_token]

tensor([ 2.4782e-01,  4.6331e-01, -3.5577e-02, -1.8669e-01,  1.7973e-01,
        -1.9919e-01, -2.4486e-01,  1.5706e+00, -4.0698e-01, -6.6301e-01,
         1.0541e-01, -7.9162e-03,  1.6807e-01,  2.4336e-01, -5.3501e-02,
        -2.4856e-02,  8.3887e-01,  6.8829e-01,  3.6021e-01, -1.2238e-01,
        -2.3724e-01,  6.1203e-03, -1.9478e-01,  7.0956e-01,  8.7734e-01,
        -8.0866e-02,  2.6147e-01, -6.0051e-03,  1.2680e-01, -1.8052e-01,
         5.0912e-01, -3.3010e-02, -3.4040e-02, -1.0994e-01,  2.9609e-02,
        -2.4005e-02,  2.9749e-01, -1.9397e-01, -6.5620e-01, -8.9805e-02,
         5.7421e-02, -3.6933e-01,  4.3783e-01,  8.7728e-02, -6.7565e-01,
        -5.8307e-01, -1.1735e+00,  7.9791e-01,  4.3002e-01, -1.0930e-01,
         1.4903e-01,  6.4313e-02, -2.9422e-01, -2.0270e-01,  9.0752e-01,
         6.7988e-01, -6.5809e-01, -1.7571e-01, -7.5000e-01, -1.0229e+00,
         4.3186e-01, -4.7129e-01, -1.6961e-01, -7.8068e-02,  4.2653e-01,
        -2.3797e-01, -1.7146e-01,  5.4161e-02, -7.5

In [82]:
cls_embeddings_all_layers = \
            [hidden_states[-1][0][index_of_CLS_token]]
output_embeddings = torch.cat(cls_embeddings_all_layers)

In [83]:
label_scores = classifier.decoder(output_embeddings)
label_scores

tensor([-2.3427,  2.9683], device='cuda:0', grad_fn=<AddBackward0>)

In [84]:
label_scores_resized = torch.reshape(label_scores, (1, 2))
label_scores_resized

tensor([[-2.3427,  2.9683]], device='cuda:0', grad_fn=<ViewBackward>)

In [146]:
token_reference = TokenReferenceBase(reference_token_idx=0)


In [147]:
lig = LayerIntegratedGradients(flair_model_wrapper, flair_model_wrapper.model.embeddings)

In [7]:
print(flair_model_wrapper.label_dictionary.get_item_for_index(1))

target_label = flair_model_wrapper.label_dictionary.get_item_for_index(1)

POSITIVE


In [9]:
target_index = flair_model_wrapper.label_dictionary.get_idx_for_item(target_label)
target_index

1

In [143]:
text = "In the 1990s, when a youthful Son Masayoshi, a Japanese entrepreneur, was pursuing acquisitions in his home country, he sought advice from a banker eight years his junior called Mikitani Hiroshi. They shared a lot in common: both had studied in America (Mr Son at the University of California, Berkeley, Mr Mikitani at Harvard Business School); they had a common interest in the internet; and they were both baseball mad. In the decades since, both men have blazed past a stifling corporate hierarchy to become two of Japan’s leading tech billionaires. Mr Mikitani, who says in an interview that he did not even know the word “entrepreneur” when he enrolled at Harvard, pioneered e-commerce in Japan via Rakuten, which is now a sprawling tech conglomerate worth $14bn. Mr Son’s SoftBank, after spectacular investments in early internet stocks, muscled into Japan’s telecoms industry. They have both invested heavily in Silicon Valley. They also each own baseball teams named after birds of prey; the SoftBank Hawks and the Rakuten Golden Eagles."

In [144]:
from segtok.segmenter import split_single
sentences = [Sentence(sent, use_tokenizer=True) for sent in split_single(text)]
sentences

[Sentence: "In the 1990s , when a youthful Son Masayoshi , a Japanese entrepreneur , was pursuing acquisitions in his home country , he sought advice from a banker eight years his junior called Mikitani Hiroshi ."   [− Tokens: 36],
 Sentence: "They shared a lot in common : both had studied in America ( Mr Son at the University of California , Berkeley , Mr Mikitani at Harvard Business School ) ; they had a common interest in the internet ; and they were both baseball mad ."   [− Tokens: 47],
 Sentence: "In the decades since , both men have blazed past a stifling corporate hierarchy to become two of Japan ’s leading tech billionaires ."   [− Tokens: 24],
 Sentence: "Mr Mikitani , who says in an interview that he did not even know the word “ entrepreneur ” when he enrolled at Harvard , pioneered e-commerce in Japan via Rakuten , which is now a sprawling tech conglomerate worth $ 14bn ."   [− Tokens: 43],
 Sentence: "Mr Son ’s SoftBank , after spectacular investments in early internet sto

In [None]:
lig.attribute()

In [10]:
visualization_list = []

In [11]:
readable_tokens, word_attributions, delta = interpret_sentence(flair_model_wrapper,
                                                                lig,
                                                                sentence,
                                                                target_label,
                                                                visualization_list,
                                                                n_steps=500,
                                                                estimation_method="gausslegendre",
                                                                internal_batch_size=3)

pred:  1 ( 0.99 ) , delta:  tensor([0.9865], device='cuda:0', dtype=torch.float64)


In [12]:
visualize_attributions(visualization_list)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
POSITIVE,POSITIVE (0.99),POSITIVE,1.15,"in the 1990s , when a youthful son mas ##ayo ##shi , a japanese entrepreneur , was pursuing acquisitions in his home country , he sought advice from a banker eight years his junior called mi ##kit ##ani hiroshi . they shared a lot in common : both had studied in america ( mr son at the university of california , berkeley , mr mi ##kit ##ani at harvard business school ) ; they had a common interest in the internet ; and they were both baseball mad . in the decades since , both men have blazed past a st ##if ##ling corporate hierarchy to become two of japan ’ s leading tech billionaire ##s . mr mi ##kit ##ani , who says in an interview that he did not even know the word “ entrepreneur ” when he enrolled at harvard , pioneered e - commerce in japan via ra ##ku ##ten , which is now a sprawling tech conglomerate worth $ 14 ##bn . mr son ’ s soft ##bank , after spectacular investments in early internet stocks , muscled into japan ’ s telecom ##s industry . they have both invested heavily in silicon valley . they also each own baseball teams named after birds of prey ; the soft ##bank hawks and the ra ##ku ##ten golden eagles ."
,,,,


In [19]:
visualization_list = []
sentences = ["couldnt agree more",
             "couldnt agree more.",
             "could n't agree more",
             "could n't agree more."
            ]
target_labels = ['POSITIVE',
                 'NEGATIVE',
                 'POSITIVE',
                 'POSITIVE'
                ]
for sentence, target_label in zip(sentences, target_labels):
    interpret_sentence(flair_model_wrapper,
                        lig,
                        sentence,
                        target_label,
                        visualization_list,
                        n_steps=500,
                        estimation_method="gausslegendre",
                        internal_batch_size=3)

pred:  1 ( 0.93 ) , delta:  tensor([0.5771], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.54 ) , delta:  tensor([1.3511], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.6212], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.89 ) , delta:  tensor([0.0386], device='cuda:0', dtype=torch.float64)


In [20]:
visualize_attributions(visualization_list)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
POSITIVE,POSITIVE (0.93),POSITIVE,-0.31,couldn ##t agree more
,,,,
NEGATIVE,POSITIVE (0.54),NEGATIVE,0.4,couldn ##t agree more .
,,,,
POSITIVE,POSITIVE (0.99),POSITIVE,0.49,could n ' t agree more
,,,,
POSITIVE,POSITIVE (0.89),POSITIVE,0.03,could n ' t agree more .
,,,,


In [34]:
visualization_list = []
sentences = ["Such a great show ! pad pad", 
             "It's a great day", 
             "It's a great day!",
             "It's a great day?",
             "It's a great day!!!",
             "It's a GREAT day.",
             "It's not a great day.",
             "It's absolutely not a great day!",
            ]
target_labels = ['POSITIVE',
                 'POSITIVE',
                 'POSITIVE',
                 'POSITIVE',
                 'POSITIVE',
                 'POSITIVE',
                 'NEGATIVE',
                 'NEGATIVE',
                ]
for sentence, target_label in zip(sentences, target_labels):
    interpret_sentence(flair_model_wrapper,
                        lig,
                        sentence,
                        target_label,
                        visualization_list,
                        n_steps=500,
                        estimation_method="gausslegendre",
                        internal_batch_size=3)

pred:  1 ( 1.00 ) , delta:  tensor([1.4830], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.94 ) , delta:  tensor([0.0631], device='cuda:0', dtype=torch.float64)
pred:  1 ( 1.00 ) , delta:  tensor([1.2953], device='cuda:0', dtype=torch.float64)
pred:  0 ( 0.84 ) , delta:  tensor([1.2559], device='cuda:0', dtype=torch.float64)
pred:  1 ( 1.00 ) , delta:  tensor([1.5507], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.6045], device='cuda:0', dtype=torch.float64)
pred:  0 ( 1.00 ) , delta:  tensor([4.6325], device='cuda:0', dtype=torch.float64)
pred:  0 ( 1.00 ) , delta:  tensor([5.0241], device='cuda:0', dtype=torch.float64)


In [37]:
s = Sentence(sentence)
classifier.predict(s)
score = float(re.findall(r'\((.*)\)',str(s.get_labels()[0]))[0])
label = re.findall(r'(.*)\s\(.*\)',str(s.get_labels()[0]))[0]

In [38]:
score,label

(0.9976, 'POSITIVE')

In [16]:
str(s.get_labels()[0])

'POSITIVE (0.9976)'

In [35]:
visualize_attributions(visualization_list)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
POSITIVE,POSITIVE (1.00),POSITIVE,1.27,such a great show ! pad pad
,,,,
POSITIVE,POSITIVE (0.94),POSITIVE,0.04,it ' s a great day
,,,,
POSITIVE,POSITIVE (1.00),POSITIVE,0.56,it ' s a great day !
,,,,
POSITIVE,NEGATIVE (0.84),POSITIVE,-1.24,it ' s a great day ?
,,,,
POSITIVE,POSITIVE (1.00),POSITIVE,0.59,it ' s a great day ! ! !
,,,,


In [11]:
sentences = ["It's a great day.", 
             "It's a great day", 
             "It's a great day!",
             "It's a great day?",
             "It's a great day!!!",
             "It's a GREAT day.",
             "IT'S A great DAY.",
             "IT'S A GREAT DAY.",
             "It's a great day. It's a great day.",
             "It's absolutely a great day.",
             "It's awfully a great day.",
             "It's ABSOLUTELY a great day.",
             "It's not a great day.",
             "It's absolutely not a great day!",
             "It's ABSOLUTELY not a great day.",
             "iT'S nOT a gREaT Day.",
             "It'snotagreatday.",
             "It's not a great day :D",
             "It's not a great day :(",
             "It is not a great day.",
             "'It's not a great day.'",
             "(It's not a great day.)",
             "It's a great day. It's a bad day.",
             "It's a great day. (It's not.)",
             "Here is my number: (850)-100-1000",
             "Here is my number (850)-100-1000",
             "Please check out https://data.tallahassee.com/",
             "Contact me at jz17d@my.fsu.edu",
             "couldnt agree more",
             "couldnt agree more.",
             "could n't agree more",
             "could n't agree more.",
             "Today sucks",
             "Today sux",
             "Kinda sux today! But I'll get by",
             "This restaurant is so good! Couldn't agree more!",
             "Never seen such a bad movie before! Couldn't agree more!",
             "How about meeting tomorrow at 10? Sounds good.",
             "The plan is great! I will talk to you later.",
             "Thoughts on this revision?",
             "Im sorry to hear this happened. Rob and Eric, can you please address this with the tenants?",
             "Don, let me know what our plan will be so that we can market accordingly.",
             ]

In [12]:
visualization_list = []
target_label = 'POSITIVE'
for sentence in sentences:
    interpret_sentence(flair_model_wrapper,
                        lig,
                        sentence,
                        target_label,
                        visualization_list,
                        n_steps=500,
                        estimation_method="gausslegendre",
                        internal_batch_size=3)

pred:  1 ( 0.99 ) , delta:  tensor([0.6045], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.94 ) , delta:  tensor([0.0631], device='cuda:0', dtype=torch.float64)
pred:  1 ( 1.00 ) , delta:  tensor([1.2953], device='cuda:0', dtype=torch.float64)
pred:  0 ( 0.84 ) , delta:  tensor([1.2559], device='cuda:0', dtype=torch.float64)
pred:  1 ( 1.00 ) , delta:  tensor([1.5507], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.6045], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.6045], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.6045], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.7523], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.9053], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.94 ) , delta:  tensor([0.3735], device='cuda:0', dtype=torch.float64)
pred:  1 ( 0.99 ) , delta:  tensor([0.9053], device='cuda:0', dtype=torch.float64)
pred

In [13]:
visualize_attributions(visualization_list)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
POSITIVE,POSITIVE (0.99),POSITIVE,0.24,it ' s a great day .
,,,,
POSITIVE,POSITIVE (0.94),POSITIVE,0.04,it ' s a great day
,,,,
POSITIVE,POSITIVE (1.00),POSITIVE,0.56,it ' s a great day !
,,,,
POSITIVE,NEGATIVE (0.84),POSITIVE,-1.24,it ' s a great day ?
,,,,
POSITIVE,POSITIVE (1.00),POSITIVE,0.59,it ' s a great day ! ! !
,,,,


In [21]:
# testing

In [22]:
target_index = flair_model_wrapper.label_dictionary.get_idx_for_item(target_label)

# In order maintain consistency with Flair, we apply the same tokenization
# steps.
flair_sentence = Sentence(sentence)

tokenized_sentence = flair_sentence.to_tokenized_string()

tokenizer_max_length = flair_model_wrapper.tokenizer.model_max_length

# This calculates the token input IDs tensor for the model.
input_ids = flair_model_wrapper.tokenizer.encode(tokenized_sentence,
                                                 add_special_tokens=False,
                                                 max_length=tokenizer_max_length,
                                                 truncation=True,
                                                 return_tensors="pt")
input_ids = input_ids.to(device)
# Create a baseline by creating a tensor of equal length
# containing the padding token tensor id.
pad_token_id = flair_model_wrapper.tokenizer.pad_token_id

ref_base_line = torch.full_like(input_ids, pad_token_id)

# Convert back to tokens as the model requires.
# As some words might get split up. e.g. Caroll to Carol l.
all_tokens = flair_model_wrapper.tokenizer.convert_ids_to_tokens(input_ids[0])

# The tokenizer in the model adds a special character
# in front of every sentence.
readable_tokens = [token.replace("▁", "") for token in all_tokens]

# The input IDs are passed to the embedding layer of the model.
# It is better to return the logits for Captum.
# https://github.com/pytorch/captum/issues/355#issuecomment-619610044
# Thus we calculate the softmax afterwards.
# For now, I take the first dimension and run this sentence, per sentence.

model_outputs = flair_model_wrapper(input_ids)

In [27]:
tokenized_sentence

"could n't agree more ."

In [26]:
input_ids

tensor([[2071, 1050, 1005, 1056, 5993, 2062, 1012]], device='cuda:0')

In [23]:
model_outputs

tensor([[-0.5925,  1.4772]], device='cuda:0', grad_fn=<ViewBackward>)

In [24]:
softmax = torch.nn.functional.softmax(model_outputs[0], dim=0)

# Return the confidence and the class ID of the top predicted class.
conf, idx = torch.max(softmax, 0)

In [25]:
conf

tensor(0.8879, device='cuda:0', grad_fn=<MaxBackward0>)