### Sequence Classification

In [30]:
from transformers import pipeline
classifier = pipeline('sentiment-analysis', 'bert-base-cased-finetuned-mrpc')
classifier('I love it here')

[{'label': 'LABEL_0', 'score': 0.790879487991333}]

### Paraphrase Detection

In [31]:
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")

In [35]:
classifier = pipeline('fill-mask', "bert-base-cased-finetuned-mrpc")

Some weights of the model checkpoint at bert-base-cased-finetuned-mrpc were not used when initializing BertForMaskedLM: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMaskedLM were not initialized from the model checkpoint at bert-base-cased-finetuned-mrpc and are newly initialized: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
You

In [36]:
classifier("Austin is the capital of [MASK]")

[{'sequence': 'Austin is the capital of answered',
  'score': 0.0024273262824863195,
  'token': 3845,
  'token_str': 'answered'},
 {'sequence': 'Austin is the capital ofeking',
  'score': 0.0020825688261538744,
  'token': 25819,
  'token_str': '##eking'},
 {'sequence': 'Austin is the capital ofender',
  'score': 0.0020735478028655052,
  'token': 15981,
  'token_str': '##ender'},
 {'sequence': 'Austin is the capital ofξ',
  'score': 0.0018538099247962236,
  'token': 28350,
  'token_str': '##ξ'},
 {'sequence': 'Austin is the capital ofzer',
  'score': 0.0017360658384859562,
  'token': 6198,
  'token_str': '##zer'}]

In [19]:
>>> classes = ["not paraphrase", "is paraphrase"]

>>> sequence_0 = "The company HuggingFace is based in New York City"
>>> sequence_1 = "Apples are especially bad for your health"
>>> sequence_2 = "HuggingFace's headquarters are situated in Manhattan"

In [20]:
>>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
>>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")

In [21]:
paraphrase.input_ids.shape

torch.Size([1, 22])

In [22]:
>>> paraphrase_classification_logits = model(**paraphrase).logits
>>> not_paraphrase_classification_logits = model(**not_paraphrase).logits

In [23]:
>>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
>>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]

In [24]:
>>> for i in range(len(classes)):
...     print(f"{classes[i]}: {int(round(paraphrase_results[i] * 100))}%")


>>> # Should not be paraphrase
>>> for i in range(len(classes)):
...     print(f"{classes[i]}: {int(round(not_paraphrase_results[i] * 100))}%")

not paraphrase: 50%
is paraphrase: 50%
not paraphrase: 50%
is paraphrase: 50%


In [26]:
from transformers import DistilBertTokenizer, DistilBertModel
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained("distilbert-base-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

In [27]:
output

BaseModelOutput(last_hidden_state=tensor([[[ 4.4062e-04, -2.6241e-01, -1.0192e-01,  ..., -6.2764e-02,
           2.7584e-01,  3.7014e-01],
         [ 7.2233e-01,  1.6449e-01,  4.0025e-01,  ...,  1.9161e-01,
           4.0458e-01, -5.8094e-02],
         [ 2.8198e-01, -1.7430e-01,  3.9076e-02,  ...,  2.7681e-02,
           1.1886e-01,  9.1439e-01],
         ...,
         [ 6.8016e-01,  7.9713e-02,  8.3603e-01,  ..., -4.8959e-01,
          -2.5017e-01, -2.3518e-01],
         [ 3.8105e-02, -8.1751e-01, -3.4076e-01,  ...,  4.4815e-01,
           9.6725e-02, -2.0311e-01],
         [ 3.5750e-01,  1.9968e-01,  1.7437e-01,  ...,  1.5028e-01,
          -2.3665e-01,  5.4390e-02]]], grad_fn=<NativeLayerNormBackward>), hidden_states=None, attentions=None)

## Extractive Question Answering

In [1]:
from transformers import pipeline
nlp = pipeline("question-answering")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/question-answering/run_squad.py script.
"""

In [2]:
result = nlp(question=["What is extractive question answering?", "How do I fine tune?"],
             context=context)

In [105]:
result

[{'score': 0.6225805878639221,
  'start': 34,
  'end': 95,
  'answer': 'the task of extracting an answer from a text given a question'},
 {'score': 0.46403419971466064,
  'start': 231,
  'end': 254,
  'answer': 'a model on a SQuAD task'}]

Many questions

In [3]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""
questions = [
    "How many pretrained models are available in 🤗 Transformers? Foo Bar.",
    "What does 🤗 Transformers provide?",
    "🤗 Transformers provides interoperability between which frameworks?"
]


In [4]:
inp = tokenizer(text=questions, 
                text_pair=text, 
                add_special_tokens=True, 
                padding=True, 
                truncation=True,
                return_tensors="pt")
inp.input_ids.shape

torch.Size([1, 114])

In [5]:
list(zip(['foo', 'bar'], ['baz']))


[('foo', 'baz')]

In [170]:
tokenizer.convert_ids_to_tokens(inp.input_ids[0])

['[CLS]',
 'how',
 'many',
 'pre',
 '##train',
 '##ed',
 'models',
 'are',
 'available',
 'in',
 '[UNK]',
 'transformers',
 '?',
 'foo',
 'bar',
 '.',
 '[SEP]',
 '[SEP]']

In [171]:
inp = tokenizer(questions[0], text, add_special_tokens=True, padding=True, return_tensors="pt")
tokenizer.convert_ids_to_tokens(inp.input_ids[0])

['[CLS]',
 'how',
 'many',
 'pre',
 '##train',
 '##ed',
 'models',
 'are',
 'available',
 'in',
 '[UNK]',
 'transformers',
 '?',
 'foo',
 'bar',
 '.',
 '[SEP]',
 '[UNK]',
 'transformers',
 '(',
 'formerly',
 'known',
 'as',
 'p',
 '##yt',
 '##or',
 '##ch',
 '-',
 'transformers',
 'and',
 'p',
 '##yt',
 '##or',
 '##ch',
 '-',
 'pre',
 '##train',
 '##ed',
 '-',
 'bert',
 ')',
 'provides',
 'general',
 '-',
 'purpose',
 'architecture',
 '##s',
 '(',
 'bert',
 ',',
 'gp',
 '##t',
 '-',
 '2',
 ',',
 'roberta',
 ',',
 'xl',
 '##m',
 ',',
 'di',
 '##sti',
 '##lbert',
 ',',
 'xl',
 '##net',
 '…',
 ')',
 'for',
 'natural',
 'language',
 'understanding',
 '(',
 'nl',
 '##u',
 ')',
 'and',
 'natural',
 'language',
 'generation',
 '(',
 'nl',
 '##g',
 ')',
 'with',
 'over',
 '32',
 '+',
 'pre',
 '##train',
 '##ed',
 'models',
 'in',
 '100',
 '+',
 'languages',
 'and',
 'deep',
 'inter',
 '##oper',
 '##ability',
 'between',
 'tensor',
 '##flow',
 '2',
 '.',
 '0',
 'and',
 'p',
 '##yt',
 '##or',
 '#

In [172]:
inp.input_ids.shape

torch.Size([1, 114])

In [113]:
import torch

start = torch.argmax(torch.softmax(out.start_logits, dim=-1), dim=-1).tolist()
end = torch.argmax(torch.softmax(out.end_logits, dim=-1), dim=-1).tolist()

In [114]:
inp.input_ids.tolist()

[[101,
  2129,
  2116,
  3653,
  23654,
  2098,
  4275,
  2024,
  2800,
  1999,
  100,
  19081,
  1029,
  102,
  102],
 [101, 2054, 2515, 100, 19081, 3073, 1029, 102, 100, 102, 0, 0, 0, 0, 0],
 [101,
  100,
  19081,
  3640,
  6970,
  25918,
  8010,
  2090,
  2029,
  7705,
  2015,
  1029,
  102,
  102,
  0]]

In [115]:
for q, i, s,e in zip(questions, inp.input_ids.tolist(), start,end):
    print(f'question: {q}')
    print(f'span: {s, e}')
    print(f'answer: {i[s:e + 1]}\n')

question: How many pretrained models are available in 🤗 Transformers?
span: (11, 11)
answer: [19081]

question: What does 🤗 Transformers provide?
span: (8, 7)
answer: []

question: 🤗 Transformers provides interoperability between which frameworks?
span: (2, 2)
answer: [19081]



In [87]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""
questions = [
    "How many pretrained models are available in 🤗 Transformers?",
    "What does 🤗 Transformers provide?",
    "🤗 Transformers provides interoperability between which frameworks?",
]
for question in questions:
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]
    outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    answer_start = torch.argmax(
        answer_start_scores
    )  # Get the most likely beginning of answer with the argmax of the score
    answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    print(f"Question: {question}")
    print(f"Answer: {answer}")

Question: How many pretrained models are available in 🤗 Transformers?
Answer: over 32 +
Question: What does 🤗 Transformers provide?
Answer: general - purpose architectures
Question: 🤗 Transformers provides interoperability between which frameworks?
Answer: tensorflow 2. 0 and pytorch


In [121]:
i = tokenizer(questions[0], text, add_special_tokens=True, return_tensors="pt")
outputs = model(**i)
answer_start = torch.argmax(outputs.start_logits)
answer_end = torch.argmax(outputs.end_logits) + 1

In [134]:
i

{'input_ids': tensor([[  101,  2129,  2116,  3653, 23654,  2098,  4275,  2024,  2800,  1999,
           100, 19081,  1029,   102,   100, 19081,  1006,  3839,  2124,  2004,
          1052, 22123,  2953,  2818,  1011, 19081,  1998,  1052, 22123,  2953,
          2818,  1011,  3653, 23654,  2098,  1011, 14324,  1007,  3640,  2236,
          1011,  3800,  4294,  2015,  1006, 14324,  1010, 14246,  2102,  1011,
          1016,  1010, 23455,  1010, 28712,  2213,  1010,  4487, 16643, 23373,
          1010, 28712,  7159,  1529,  1007,  2005,  3019,  2653,  4824,  1006,
         17953,  2226,  1007,  1998,  3019,  2653,  4245,  1006, 17953,  2290,
          1007,  2007,  2058,  3590,  1009,  3653, 23654,  2098,  4275,  1999,
          2531,  1009,  4155,  1998,  2784,  6970, 25918,  8010,  2090, 23435,
         12314,  1016,  1012,  1014,  1998,  1052, 22123,  2953,  2818,  1012,
           102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [122]:
answer_start, answer_end

(tensor(82), tensor(85))

In [133]:
tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(i.input_ids.tolist()[0])
)

'[CLS] how many pretrained models are available in [UNK] transformers? [SEP] [UNK] transformers ( formerly known as pytorch - transformers and pytorch - pretrained - bert ) provides general - purpose architectures ( bert, gpt - 2, roberta, xlm, distilbert, xlnet … ) for natural language understanding ( nlu ) and natural language generation ( nlg ) with over 32 + pretrained models in 100 + languages and deep interoperability between tensorflow 2. 0 and pytorch. [SEP]'

tensor([[  101,  2129,  2116,  3653, 23654,  2098,  4275,  2024,  2800,  1999,
           100, 19081,  1029,   102,   100, 19081,  1006,  3839,  2124,  2004,
          1052, 22123,  2953,  2818,  1011, 19081,  1998,  1052, 22123,  2953,
          2818,  1011,  3653, 23654,  2098,  1011, 14324,  1007,  3640,  2236,
          1011,  3800,  4294,  2015,  1006, 14324,  1010, 14246,  2102,  1011,
          1016,  1010, 23455,  1010, 28712,  2213,  1010,  4487, 16643, 23373,
          1010, 28712,  7159,  1529,  1007,  2005,  3019,  2653,  4824,  1006,
         17953,  2226,  1007,  1998,  3019,  2653,  4245,  1006, 17953,  2290,
          1007,  2007,  2058,  3590,  1009,  3653, 23654,  2098,  4275,  1999,
          2531,  1009,  4155,  1998,  2784,  6970, 25918,  8010,  2090, 23435,
         12314,  1016,  1012,  1014,  1998,  1052, 22123,  2953,  2818,  1012,
           102]])

### Masked Language Modeling

In [6]:
from transformers import pipeline
nlp = pipeline("fill-mask")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=480.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=331070498.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




In [9]:
nlp.tokenizer.mask_token

'<mask>'

In [10]:
nlp(f"The quick brown fox {nlp.tokenizer.mask_token} over the hen.")

[{'sequence': 'The quick brown fox leaps over the hen.',
  'score': 0.11760932207107544,
  'token': 32564,
  'token_str': ' leaps'},
 {'sequence': 'The quick brown fox watches over the hen.',
  'score': 0.09140879660844803,
  'token': 11966,
  'token_str': ' watches'},
 {'sequence': 'The quick brown fox jumps over the hen.',
  'score': 0.0617658793926239,
  'token': 13855,
  'token_str': ' jumps'},
 {'sequence': 'The quick brown fox takes over the hen.',
  'score': 0.05134272575378418,
  'token': 1239,
  'token_str': ' takes'},
 {'sequence': 'The quick brown fox leapt over the hen.',
  'score': 0.028735965490341187,
  'token': 34042,
  'token_str': ' leapt'}]

The long way

In [44]:
from transformers import AutoModelWithLMHead, AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
input = tokenizer.encode(sequence, return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
top_5_tokens



[4851, 2773, 9711, 18134, 4607]

In [42]:
tokenizer.encode(sequence, return_tensors="pt")

tensor([[  101, 12120,  2050,  8683,  1181,  3584,  1132,  2964,  1190,  1103,
          3584,  1152, 27180,   119,  7993,  1172,  1939,  1104,  1103,  1415,
          3827,  1156,  1494,   103,  1412,  6302,  2555, 10988,   119,   102]])

In [45]:
mask_token_index

tensor([23])

In [43]:
from transformers import AutoModelWithLMHead, AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
input = tokenizer(sequence, return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
top_5_tokens

{'input_ids': tensor([[  101, 12120,  2050,  8683,  1181,  3584,  1132,  2964,  1190,  1103,
          3584,  1152, 27180,   119,  7993,  1172,  1939,  1104,  1103,  1415,
          3827,  1156,  1494,   103,  1412,  6302,  2555, 10988,   119,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]])}

# Casusal Language Modeling

In [39]:
from transformers import AutoModelWithLMHead, AutoTokenizer
model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased")
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
# Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
PADDING_TEXT = """In 1991, the remains of Russian Tsar Nicholas II and his family
(except for Alexei and Maria) are discovered.
The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
remainder of the story. 1883 Western Siberia,
a young Grigori Rasputin is asked by his father and a group of men to perform magic.
Rasputin has a vision and denounces one of the men as a horse thief. Although his
father initially slaps him for making such an accusation, Rasputin watches as the
man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
with people, even a bishop, begging for his blessing. <eod> </s> <eos>"""
prompt = "Today the weather is really nice and I am planning on "
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
prompt_length = len(tokenizer.decode(inputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True))
outputs = model.generate(inputs, max_length=250, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length:]

In [28]:
generated

'Today the weather is really nice and I am planning on  on it an an an an an an an an an an an an an an an an an an an an an an an an an an an an and an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an an a an An an an an an an an an an a an an an an an an an an an an an an an an an an an and an an an An an an an was an an an an an an an an is an an an an an an an an an an an ans an an an an an an An and an an an an be an an an an an an an an an an an a an an an an an an an an an an an an an an an an an ans an An an an an ans an an an, an an an an an an an'

In [26]:
outputs

tensor([[   67,  2840,    19,    18,  1484,    20,   965, 29077,  8719,  1273,
            21,    45,   273,    17,    10, 15048,    28, 27511,    21,  4185,
            11,    41,  2444,     9,    32,  1025,    20,  8719,    26,    23,
           673,   966,    19, 29077, 20643, 27511, 20822, 20643,    19,    17,
          6616, 17511,    18,  8978,    20,    18,   777,     9, 19233,  1527,
         17669,    19,    24,   673,    17, 28756,   150, 12943,  4354,   153,
            27,   442,    37,    45,   668,    21,    24,   256,    20,   416,
            22,  2771,  4901,     9, 12943,  4354,   153,    51,    24,  3004,
            21, 28142,    23,    65,    20,    18,   416,    34,    24,  2958,
         22947,     9,  1177,    45,   668,  3097, 13768,    23,   103,    28,
           441,   148,    48, 20522,    19, 12943,  4354,   153, 12860,    34,
            18,   326,    27, 17492,   684,    21,  6709,     9,  8585,   123,
           266,    19, 12943,  4354,   153,  6872,  

## NER

In [31]:
from transformers import pipeline
nlp = pipeline("ner")
sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, \
therefore very close to the Manhattan Bridge which is visible from the window."""

In [33]:
for t in nlp(sequence):
    print(t)

{'word': 'Hu', 'score': 0.999578595161438, 'entity': 'I-ORG', 'index': 1, 'start': 0, 'end': 2}
{'word': '##gging', 'score': 0.9909763932228088, 'entity': 'I-ORG', 'index': 2, 'start': 2, 'end': 7}
{'word': 'Face', 'score': 0.9982224702835083, 'entity': 'I-ORG', 'index': 3, 'start': 8, 'end': 12}
{'word': 'Inc', 'score': 0.9994880557060242, 'entity': 'I-ORG', 'index': 4, 'start': 13, 'end': 16}
{'word': 'New', 'score': 0.9994345307350159, 'entity': 'I-LOC', 'index': 11, 'start': 40, 'end': 43}
{'word': 'York', 'score': 0.9993196129798889, 'entity': 'I-LOC', 'index': 12, 'start': 44, 'end': 48}
{'word': 'City', 'score': 0.9993793964385986, 'entity': 'I-LOC', 'index': 13, 'start': 49, 'end': 53}
{'word': 'D', 'score': 0.9862582683563232, 'entity': 'I-LOC', 'index': 19, 'start': 79, 'end': 80}
{'word': '##UM', 'score': 0.9514269232749939, 'entity': 'I-LOC', 'index': 20, 'start': 80, 'end': 82}
{'word': '##BO', 'score': 0.933659017086029, 'entity': 'I-LOC', 'index': 21, 'start': 82, 'end':