In [1]:
import torch
from transformers import BertTokenizer, BertModel
from scipy.spatial.distance import cosine
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#The sentence
#txt = "I need to visit the bank tomorrow; after that, we'll set up a tent by the river bank, just across from the bank where my friend works."
#txt = "After stealing money from the bank vault, the bank robber was seen fishing on the Mississippi river bank."
txt = "I need to visit the financial bank tomorrow; after that, we'll set up a tent by the river bank, just across from the bank building where my friend works."

#Add the special tokens
wrangled_txt = '[CLS] ' + txt + ' [SEP]'

#tokenization
tokenized_txt = tokenizer.tokenize(wrangled_txt)

print(tokenized_txt)

['[CLS]', 'i', 'need', 'to', 'visit', 'the', 'financial', 'bank', 'tomorrow', ';', 'after', 'that', ',', 'we', "'", 'll', 'set', 'up', 'a', 'tent', 'by', 'the', 'river', 'bank', ',', 'just', 'across', 'from', 'the', 'bank', 'building', 'where', 'my', 'friend', 'works', '.', '[SEP]']


In [3]:
#get the ids of the tokens
ids_tokens = tokenizer.convert_tokens_to_ids(tokenized_txt)

#Display the tokens
for t in zip(tokenized_txt, ids_tokens):
    print('{:<12} {:>8,}'.format(t[0], t[1]))

[CLS]             101
i               1,045
need            2,342
to              2,000
visit           3,942
the             1,996
financial       3,361
bank            2,924
tomorrow        4,826
;               1,025
after           2,044
that            2,008
,               1,010
we              2,057
'               1,005
ll              2,222
set             2,275
up              2,039
a               1,037
tent            9,311
by              2,011
the             1,996
river           2,314
bank            2,924
,               1,010
just            2,074
across          2,408
from            2,013
the             1,996
bank            2,924
building        2,311
where           2,073
my              2,026
friend          2,767
works           2,573
.               1,012
[SEP]             102


In [4]:
segments_ids = [1] * len(tokenized_txt)
#Convert the token IDs and segment IDs into tensors.

token_tensor = torch.tensor([ids_tokens])
segment_tensor = torch.tensor([segments_ids])

In [None]:
# Load pre-trained model with the weights
model = BertModel.from_pretrained('bert-base-uncased', output_hidden_states = True, return_dict = False)
# Put the model in "evaluation" mode, meaning feed-forward operation.
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [6]:
#https://huggingface.co/docs/transformers/model_doc/bert#bertmodel
#The input is of the shape (batch_size, sequence_length)
#Compute the output
with torch.no_grad():
    outputs = model(token_tensor, segment_tensor)
hidden_states = outputs[2]

In [7]:
#The first one is initial embeddings
print ("Number of layers:", len(hidden_states))
layer_ptr = 0

print ("Number of batches:", len(hidden_states[layer_ptr]))
batch_ptr = 0

print ("Number of tokens:", len(hidden_states[layer_ptr][batch_ptr]))
token_ptr = 0

print ("Number of hidden units:", len(hidden_states[layer_ptr][batch_ptr][token_ptr]))

Number of layers: 13
Number of batches: 1
Number of tokens: 37
Number of hidden units: 768


In [8]:
#Concatenate all the layers
token_embeddings = torch.stack(hidden_states, dim=0)

#remove the batch dimension
token_embeddings = torch.squeeze(token_embeddings, dim=1)
print(token_embeddings.shape)

torch.Size([13, 37, 768])


In [9]:
# Swap dimensions 0 and 1 so that each word contains the 13 layer hidden states
token_embeddings = token_embeddings.permute(1,0,2)

token_embeddings.size()

torch.Size([37, 13, 768])

In [10]:
#sum the last four layers
token_vectors_sum = []

# token_embeddings is a [35 x 13 x 768] tensor.

# For each token in the sentence...
for token in token_embeddings:

    # `token` is a [12 x 768] tensor

    # Sum the vectors from the last four layers.
    sum_vector = torch.sum(token[-4:], dim=0)

    # Use `sum_vec` to represent `token`.
    token_vectors_sum.append(sum_vector)

print ('Shape is: %d x %d' % (len(token_vectors_sum), len(token_vectors_sum[0])))


Shape is: 37 x 768


In [11]:
#Display the token
for i, t in enumerate(tokenized_txt):
  print (i, t)

0 [CLS]
1 i
2 need
3 to
4 visit
5 the
6 financial
7 bank
8 tomorrow
9 ;
10 after
11 that
12 ,
13 we
14 '
15 ll
16 set
17 up
18 a
19 tent
20 by
21 the
22 river
23 bank
24 ,
25 just
26 across
27 from
28 the
29 bank
30 building
31 where
32 my
33 friend
34 works
35 .
36 [SEP]


In [12]:
token_vectors = torch.stack(token_vectors_sum)

In [13]:
#compare the word bank in 7, 23, and 29
#txt = "I need to visit the financial bank tomorrow; after that, we'll set up a tent by the river bank, just across from the bank building where my friend works."

same_bank_word = 1 - cosine(token_vectors[7], token_vectors[29])
diff_bank_word1 = 1 - cosine(token_vectors[7], token_vectors[23])
diff_bank_word2 = 1 - cosine(token_vectors[23], token_vectors[29])

print('Vector similarity for  *similar*  meanings:  %.2f' % same_bank_word)
print('Vector similarity for *different* meanings:  %.2f' % diff_bank_word1)
print('Vector similarity for *different* meanings:  %.2f' % diff_bank_word2)

Vector similarity for  *similar*  meanings:  0.78
Vector similarity for *different* meanings:  0.66
Vector similarity for *different* meanings:  0.68


In [46]:
def bert_get_context_in_statue(txt, model):
    wrangled_txt = '[CLS] ' + txt + ' [SEP]'
    tokenized_txt = tokenizer.tokenize(wrangled_txt)
    ids_tokens = tokenizer.convert_tokens_to_ids(tokenized_txt)
    segments_ids = [1] * len(tokenized_txt)
    #Convert the token IDs and segment IDs into tensors.

    token_tensor = torch.tensor([ids_tokens])
    segment_tensor = torch.tensor([segments_ids])

    # Put the model in "evaluation" mode, meaning feed-forward operation.
    model.eval()

    with torch.no_grad():
        outputs = model(token_tensor, segment_tensor)
    hidden_states = outputs[2]

    #Concatenate all the layers
    token_embeddings = torch.stack(hidden_states, dim=0)

    #remove the batch dimension
    token_embeddings = torch.squeeze(token_embeddings, dim=1)
    print(token_embeddings.shape)

    # Swap dimensions 0 and 1 so that each word contains the 13 layer hidden states
    token_embeddings = token_embeddings.permute(1,0,2)

    token_embeddings.size()

    #sum the last four layers
    token_vectors_sum = []

    # token_embeddings is a [35 x 13 x 768] tensor.

    # For each token in the sentence...
    for token in token_embeddings:

        # `token` is a [12 x 768] tensor

        # Sum the vectors from the last four layers.
        sum_vector = torch.sum(token[-4:], dim=0)

        # Use `sum_vec` to represent `token`.
        token_vectors_sum.append(sum_vector)

    print ('Shape is: %d x %d' % (len(token_vectors_sum), len(token_vectors_sum[0])))

    return(torch.stack(token_vectors_sum))


In [None]:
#compare the word bank in 7, 23, and 29
#txt = "I need to visit the financial bank tomorrow; after that, we'll set up a tent by the river bank, just across from the bank building where my friend works."

same_bank_word = 1 - cosine(token_vectors[7], token_vectors[29])
diff_bank_word1 = 1 - cosine(token_vectors[7], token_vectors[23])
diff_bank_word2 = 1 - cosine(token_vectors[23], token_vectors[29])

print('Vector similarity for  *similar*  meanings:  %.2f' % same_bank_word)
print('Vector similarity for *different* meanings:  %.2f' % diff_bank_word1)
print('Vector similarity for *different* meanings:  %.2f' % diff_bank_word2)

## What about the language of the Clean Art Act (thinking back to Chevron)

- https://www.law.cornell.edu/uscode/text/42/7411

In [106]:
uscode_42_7411='''
42 U.S. Code § 7411 - Standards of performance for new stationary sources
(a)Definitions
For purposes of this section:
(1)The term “standard of performance” means a standard for emissions of air pollutants which reflects the degree of emission limitation achievable through the application of the best system of emission reduction which (taking into account the cost of achieving such reduction and any nonair quality health and environmental impact and energy requirements) the Administrator determines has been adequately demonstrated.
(2)The term “new source” means any stationary source, the construction or modification of which is commenced after the publication of regulations (or, if earlier, proposed regulations) prescribing a standard of performance under this section which will be applicable to such source.
(3)The term “stationary source” means any building, structure, facility, or installation which emits or may emit any air pollutant. Nothing in subchapter II of this chapter relating to nonroad engines shall be construed to apply to stationary internal combustion engines.
(4)The term “modification” means any physical change in, or change in the method of operation of, a stationary source which increases the amount of any air pollutant emitted by such source or which results in the emission of any air pollutant not previously emitted.
(5)The term “owner or operator” means any person who owns, leases, operates, controls, or supervises a stationary source.
(6)The term “existing source” means any stationary source other than a new source.
(7)The term “technological system of continuous emission reduction” means—
(A)a technological process for production or operation by any source which is inherently low-polluting or nonpolluting, or
(B)a technological system for continuous reduction of the pollution generated by a source before such pollution is emitted into the ambient air, including precombustion cleaning or treatment of fuels.
(8)A conversion to coal (A) by reason of an order under section 2(a) of the Energy Supply and Environmental Coordination Act of 1974 [15 U.S.C. 792(a)] or any amendment thereto, or any subsequent enactment which supersedes such Act [15 U.S.C. 791 et seq.], or (B) which qualifies under section 7413(d)(5)(A)(ii)
'''
# clean up text to get more hits
uscode_42_7411=uscode_42_7411.replace('“','')
uscode_42_7411=uscode_42_7411.replace('”','')
uscode_42_7411=uscode_42_7411.replace(',','')
uscode_42_7411=uscode_42_7411.replace('sources','source')

print(uscode_42_7411)


42 U.S. Code § 7411 - Standards of performance for new stationary source
(a)Definitions
For purposes of this section:
(1)The term standard of performance means a standard for emissions of air pollutants which reflects the degree of emission limitation achievable through the application of the best system of emission reduction which (taking into account the cost of achieving such reduction and any nonair quality health and environmental impact and energy requirements) the Administrator determines has been adequately demonstrated.
(2)The term new source means any stationary source the construction or modification of which is commenced after the publication of regulations (or if earlier proposed regulations) prescribing a standard of performance under this section which will be applicable to such source.
(3)The term stationary source means any building structure facility or installation which emits or may emit any air pollutant. Nothing in subchapter II of this chapter relating to nonroa

In [103]:
cleanairact_token_vectors = bert_get_context_in_statue(uscode_42_7411, 
                                                       BertModel.from_pretrained('bert-base-uncased', 
                                                                                 output_hidden_states = True, 
                                                                                 return_dict = False))

torch.Size([13, 472, 768])
Shape is: 472 x 768


In [104]:
ambiguous_term='source'

In [105]:
# okay I probably want to get the index of each occurrence of the ambiguous term
ambiguous_term = "source"
ambiguous_term_indexes = [i for i, token in enumerate(uscode_42_7411.split()) if token == ambiguous_term]
ambiguous_term_indexes

[12, 81, 85, 122, 177, 189, 224, 228, 252, 272]

In [115]:
# apply cosine similarity to find similar meanings for the first index relative to each subsequent. i.e. the title to the statute compared to the language
#[12, 81, 85, 122, 177, 189, 224, 228, 252, 272]

for i in [12, 81, 85, 122, 177, 189, 224, 228, 252, 272]:
    if i == 12:
        continue
    same_source_word = 1 - cosine(cleanairact_token_vectors[12], cleanairact_token_vectors[i])
    print('Vector similarity for  *similar*  meanings:  %.2f' % same_source_word)
    # print three tokens before and after each index for the term in a single line, starting and ending with ...
    tokens = uscode_42_7411.split()
    start = max(0, i - 3)
    end = min(len(tokens), i + 4)
    context = ' '.join(tokens[start:end])
    print(f"... {context} ...")
    print('')


Vector similarity for  *similar*  meanings:  0.34
... (2)The term new source means any stationary ...

Vector similarity for  *similar*  meanings:  0.36
... means any stationary source the construction or ...

Vector similarity for  *similar*  meanings:  0.33
... (3)The term stationary source means any building ...

Vector similarity for  *similar*  meanings:  0.32
... of a stationary source which increases the ...

Vector similarity for  *similar*  meanings:  0.20
... emitted by such source or which results ...

Vector similarity for  *similar*  meanings:  0.40
... (6)The term existing source means any stationary ...

Vector similarity for  *similar*  meanings:  0.30
... means any stationary source other than a ...

Vector similarity for  *similar*  meanings:  0.27
... operation by any source which is inherently ...

Vector similarity for  *similar*  meanings:  0.32
... generated by a source before such pollution ...



## Using AWS Bedrock with DeepSeek

- passing the prompt to the model for a similar AI LLM based task
- this compares later to the bert pipeline for filling in the blanks
- is really just exploratory on how embedding models work, and how they along with some AI techniques can be used for various NLP tasks in `getout-of-text3`

### examples

- In finding the ordinary meaning of words, namely the ambiguous text of importance in a statutory interpretation that is up for debate, there are various techniques we can employ to disambiguate the text and extract its intended meaning, including tradition KWIC (COCA), Embedding (LEGAL-BERT), and AI LLMs (DeepSeek on AWS Bedrock)

In [120]:
import boto3

session = boto3.Session(profile_name='atn-developer')

bedrock = session.client(
    "bedrock-runtime",
    region_name="us-east-1"
)

response = bedrock.invoke_model(
    modelId="us.deepseek.r1-v1:0",
    contentType="application/json",
    accept="application/json",
    body='{"prompt": "Please analyze the masked sentence to fill the mask: \\"To modify means we should [MASK] significant changes.\\"", "max_tokens": 256}',
)

deepseek = response['body'].read()

In [165]:
def get_bedrock_summary(ambiguous_term,statutory_text,bedrock_session):
    prompt_text="You are an AI LLM assistant who is knowledgeable about legal statutes.\
         Your task is to provide a summary of the statutory text related to the ambiguous term.\
         Please ensure that your summary is clear and concise. \
         The goal of course is to identify clear and ordinary meaning of the term, in the statutory context, without any bias.\
         Subsequent analysis will perform NLP tasks to further extract the ordinary meaning, but this is an AI LLM assistant's interpretation for reference. \
         MAX_RESPONSE_TOKENS = 512\
         Term is: {}\
         Statutory text is: {}".format(ambiguous_term, statutory_text)

    print(prompt_text)

    print('Passing to bedrock...')

    # Convert the body dictionary to a JSON string and encode it
    body = json.dumps({
        "prompt": prompt_text,
        "max_tokens": 512
    }).encode("utf-8")

    response = bedrock_session.invoke_model(
        modelId="us.deepseek.r1-v1:0",
        contentType="application/json",
        accept="application/json",
        body=body
    )

    return response['body'].read()
    


In [166]:
resp = get_bedrock_summary(ambiguous_term, uscode_42_7411,bedrock)

You are an AI LLM assistant who is knowledgeable about legal statutes.         Your task is to provide a summary of the statutory text related to the ambiguous term.         Please ensure that your summary is clear and concise.          The goal of course is to identify clear and ordinary meaning of the term, in the statutory context, without any bias.         Subsequent analysis will perform NLP tasks to further extract the ordinary meaning, but this is an AI LLM assistant's interpretation for reference.          MAX_RESPONSE_TOKENS = 512         Term is: source         Statutory text is: 
42 U.S. Code § 7411 - Standards of performance for new stationary source
(a)Definitions
For purposes of this section:
(1)The term standard of performance means a standard for emissions of air pollutants which reflects the degree of emission limitation achievable through the application of the best system of emission reduction which (taking into account the cost of achieving such reduction and any no

In [167]:
import json

deepseek_dict = json.loads(resp.decode())

#deepseek_dict['choices'][0]['text']
# print with line wrap
# I want to print with a line break in the ['text']
text = deepseek_dict['choices'][0]['text']
for i in range(0, len(text), 100):
    print(text[i:i+100])

</think>

Okay, let's tackle this. The user wants a summary of the statutory text related to the ter
m "source" in 42 U.S. Code § 7411. First, I need to find where "source" is defined in the statute. L
ooking at the definitions in subsection (a), the term "stationary source" is defined in (3). 

The d
efinition says a stationary source is any building, structure, facility, or installation that emits 
or may emit air pollutants. Also, there's a note that subchapter II about nonroad engines doesn't ap
ply to stationary internal combustion engines. So, "source" here refers to fixed emitters like build
ings or facilities.

But wait, the user mentioned the term is "source," but the statute uses "statio
nary source." I should check if "source" alone is used elsewhere. In (2), "new source" is defined as
 a stationary source constructed or modified after regulations are published. Similarly, "existing s
ource" in (6) refers to stationary sources that aren't new. So, in this context, "source" i

In [None]:
# excel logic to 
329 U.S. 64	

# format to https://tile.loc.gov/storage-services/service/ll/usrep/usrep329/usrep32964/usrep32964.pdf
https://tile.loc.gov/storage-services/service/ll/usrep/usrep{}/usrep{}{}/usrep{}{}.pdf


In [135]:
import json

deepseek_dict = json.loads(deepseek.decode())
#print(deepseek_dict)

#deepseek_dict['choices'][0]['text']
# print with line wrap
# I want to print with a line break in the ['text']
text = deepseek_dict['choices'][0]['text']
for i in range(0, len(text), 100):
    print(text[i:i+100])

 Please provide the answer in the format: [Answer] "answer".

Okay, let's see. The sentence is "To m
odify means we should [MASK] significant changes." I need to find the right word to replace [MASK]. 
The verb here should fit with "significant changes" and make sense with "modify." 

First, "modify" 
means to make partial or minor changes. So the sentence is explaining that modifying involves doing 
something to significant changes. Wait, that might not make sense. Maybe the sentence is saying that
 when we modify, we should do something with significant changes. Hmm. Maybe the intended meaning is
 that modifying requires making significant changes. But "modify" usually implies smaller changes. M
aybe there's a contradiction here. Or perhaps the sentence is trying to say that to modify something
, you need to implement or create significant changes. 

Let me think of possible verbs. Common coll
ocations with "changes" include "make," "implement," "introduce," "create," "bring about," 

In [1]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("fill-mask", model="nlpaueb/legal-bert-base-uncased")

  from .autonotebook import tqdm as notebook_tqdm
Device set to use mps:0


In [2]:
pipe("To modify means we should [MASK] significant changes")

[{'score': 0.14109571278095245,
  'token': 468,
  'token_str': 'make',
  'sequence': 'to modify means we should make significant changes'},
 {'score': 0.1251792460680008,
  'token': 321,
  'token_str': 'see',
  'sequence': 'to modify means we should see significant changes'},
 {'score': 0.07972630113363266,
  'token': 247,
  'token_str': 'have',
  'sequence': 'to modify means we should have significant changes'},
 {'score': 0.06908190250396729,
  'token': 4908,
  'token_str': 'expect',
  'sequence': 'to modify means we should expect significant changes'},
 {'score': 0.058650076389312744,
  'token': 594,
  'token_str': 'report',
  'sequence': 'to modify means we should report significant changes'}]

In [74]:
pipe("That bike is a [MASK] that is not permitted in the park.")

[{'score': 0.1371982991695404,
  'token': 4672,
  'token_str': 'commodity',
  'sequence': 'that bike is a commodity that is not permitted in the park.'},
 {'score': 0.08565253764390945,
  'token': 4175,
  'token_str': 'game',
  'sequence': 'that bike is a game that is not permitted in the park.'},
 {'score': 0.07633555680513382,
  'token': 424,
  'token_str': 'product',
  'sequence': 'that bike is a product that is not permitted in the park.'},
 {'score': 0.0717778131365776,
  'token': 446,
  'token_str': 'service',
  'sequence': 'that bike is a service that is not permitted in the park.'},
 {'score': 0.062396444380283356,
  'token': 1343,
  'token_str': 'vehicle',
  'sequence': 'that bike is a vehicle that is not permitted in the park.'}]

In [8]:
from transformers import AutoTokenizer, BertForMaskedLM
import torch

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
model = BertForMaskedLM.from_pretrained("google-bert/bert-base-uncased")

# Tokenize the input sentence with the mask token
inputs = tokenizer("The capital of France is <mask>.", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# Retrieve index of <mask>
mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]

# Predict the token for <mask>
predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)
print("Predicted token:", tokenizer.decode(predicted_token_id))

# Create labels tensor matching the input shape
labels = inputs.input_ids.clone()
labels[0, mask_token_index] = tokenizer("Paris", add_special_tokens=False)["input_ids"][0]
labels[labels != tokenizer.mask_token_id] = -100  # Mask all tokens except the label

# Compute the loss
outputs = model(**inputs, labels=labels)
print("Loss:", round(outputs.loss.item(), 2))

Some weights of the model checkpoint at google-bert/bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Predicted token: 
Loss: nan


In [9]:
from transformers import pipeline

classifier = pipeline(
    task="text-classification",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    dtype=torch.float16,
    device=0
)

result = classifier("I love using Hugging Face Transformers!")
print(result)

Device set to use mps:0


[{'label': 'POSITIVE', 'score': 0.9971315860748291}]


In [15]:
from transformers import pipeline

classifier = pipeline(
    task="fill-mask",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    dtype=torch.float16,
    device=0
)

result = classifier("I love using a blue [MASK] when I'm on the tennis court!")
print(result)

Some weights of DistilBertForMaskedLM were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use mps:0


[{'score': 0.0021086677443236113, 'token': 20211, 'token_str': '##eyer', 'sequence': "i love using a blueeyer when i'm on the tennis court!"}, {'score': 0.0018039444694295526, 'token': 13936, 'token_str': '##uded', 'sequence': "i love using a blueuded when i'm on the tennis court!"}, {'score': 0.0016544635873287916, 'token': 24960, 'token_str': 'marrow', 'sequence': "i love using a blue marrow when i'm on the tennis court!"}, {'score': 0.0015833282377570868, 'token': 29114, 'token_str': 'accountants', 'sequence': "i love using a blue accountants when i'm on the tennis court!"}, {'score': 0.0015499064465984702, 'token': 16256, 'token_str': '##duction', 'sequence': "i love using a blueduction when i'm on the tennis court!"}]


In [10]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("fill-mask", model="nlpaueb/legal-bert-base-uncased")
pipe("To modify means we should [MASK] significant changes.")

Device set to use mps:0


[{'score': 0.5017262101173401,
  'token': 468,
  'token_str': 'make',
  'sequence': 'to modify means we should make significant changes.'},
 {'score': 0.04991573467850685,
  'token': 1262,
  'token_str': 'consider',
  'sequence': 'to modify means we should consider significant changes.'},
 {'score': 0.04759223759174347,
  'token': 321,
  'token_str': 'see',
  'sequence': 'to modify means we should see significant changes.'},
 {'score': 0.044308967888355255,
  'token': 4908,
  'token_str': 'expect',
  'sequence': 'to modify means we should expect significant changes.'},
 {'score': 0.037930168211460114,
  'token': 2600,
  'token_str': 'identify',
  'sequence': 'to modify means we should identify significant changes.'}]